From: Apple Date: Thu, 4 Oct 2018 22:01:40 +0000 (+0000) Subject: libpthread-330.201.1.tar.gz X-Git-Tag: macos-1014^0 X-Git-Url: https://git.saurik.com/apple/libpthread.git/commitdiff_plain/214d78a2e71d35948bb3c390fec58031c3f0611b?ds=sidebyside libpthread-330.201.1.tar.gz --- diff --git a/kern/kern_init.c b/kern/kern_init.c index 3de9b5d..3321483 100644 --- a/kern/kern_init.c +++ b/kern/kern_init.c @@ -17,21 +17,12 @@ pthread_callbacks_t pthread_kern; const struct pthread_functions_s pthread_internal_functions = { .pthread_init = _pthread_init, - .fill_procworkqueue = (int(*)(proc_t, void*))_fill_procworkqueue, - .get_pwq_state_kdp = _get_pwq_state_kdp, - .workqueue_exit = _workqueue_exit, - .workqueue_mark_exiting = _workqueue_mark_exiting, - .workqueue_thread_yielded = _workqueue_thread_yielded, - .workqueue_get_sched_callback = _workqueue_get_sched_callback, .pth_proc_hashinit = _pth_proc_hashinit, .pth_proc_hashdelete = _pth_proc_hashdelete, .bsdthread_create = _bsdthread_create, .bsdthread_register = _bsdthread_register, .bsdthread_terminate = _bsdthread_terminate, - .bsdthread_ctl = _bsdthread_ctl, .thread_selfid = _thread_selfid, - .workq_kernreturn = _workq_kernreturn, - .workq_open = _workq_open, .psynch_mutexwait = _psynch_mutexwait, .psynch_mutexdrop = _psynch_mutexdrop, @@ -48,12 +39,11 @@ const struct pthread_functions_s pthread_internal_functions = { .pthread_find_owner = _pthread_find_owner, .pthread_get_thread_kwq = _pthread_get_thread_kwq, - .workq_reqthreads = _workq_reqthreads, - .thread_qos_from_pthread_priority = _thread_qos_from_pthread_priority, - .pthread_priority_canonicalize2 = _pthread_priority_canonicalize, - .workq_thread_has_been_unbound = _workq_thread_has_been_unbound, - .workq_threadreq = workq_kern_threadreq, - .workq_threadreq_modify = workq_kern_threadreq_modify, + .workq_create_threadstack = workq_create_threadstack, + .workq_destroy_threadstack = workq_destroy_threadstack, + .workq_setup_thread = workq_setup_thread, + .workq_handle_stack_events = workq_handle_stack_events, + .workq_markfree_threadstack = workq_markfree_threadstack, }; kern_return_t pthread_start(__unused kmod_info_t * ki, __unused void *d) diff --git a/kern/kern_internal.h b/kern/kern_internal.h index fa2c27b..bb29cdc 100644 --- a/kern/kern_internal.h +++ b/kern/kern_internal.h @@ -29,7 +29,12 @@ #ifndef _SYS_PTHREAD_INTERNAL_H_ #define _SYS_PTHREAD_INTERNAL_H_ +#include +#include +#include + #ifdef KERNEL +struct ksyn_waitq_element; #include #include #include @@ -64,101 +69,6 @@ #define PTHREAD_FEATURE_WORKLOOP 0x80 /* supports workloops */ #define PTHREAD_FEATURE_QOS_DEFAULT 0x40000000 /* the kernel supports QOS_CLASS_DEFAULT */ -/* pthread bsdthread_ctl sysctl commands */ -#define BSDTHREAD_CTL_SET_QOS 0x10 /* bsdthread_ctl(BSDTHREAD_CTL_SET_QOS, thread_port, tsd_entry_addr, 0) */ -#define BSDTHREAD_CTL_GET_QOS 0x20 /* bsdthread_ctl(BSDTHREAD_CTL_GET_QOS, thread_port, 0, 0) */ -#define BSDTHREAD_CTL_QOS_OVERRIDE_START 0x40 /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_START, thread_port, priority, 0) */ -#define BSDTHREAD_CTL_QOS_OVERRIDE_END 0x80 /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_END, thread_port, 0, 0) */ -#define BSDTHREAD_CTL_SET_SELF 0x100 /* bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, flags) */ -#define BSDTHREAD_CTL_QOS_OVERRIDE_RESET 0x200 /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_RESET, 0, 0, 0) */ -#define BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH 0x400 /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH, thread_port, priority, 0) */ -#define BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD 0x401 /* bsdthread_ctl(BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD, thread_port, priority, resource) */ -#define BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET 0x402 /* bsdthread_ctl(BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET, 0|1 (?reset_all), resource, 0) */ -#define BSDTHREAD_CTL_QOS_MAX_PARALLELISM 0x800 /* bsdthread_ctl(BSDTHREAD_CTL_QOS_MAX_PARALLELISM, priority, flags, 0) */ - -/* qos_class_t is mapped into one of these bits in the bitfield, this mapping now exists here because - * libdispatch requires the QoS class mask of the pthread_priority_t to be a bitfield. - */ -#define __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE 0x20 -#define __PTHREAD_PRIORITY_CBIT_USER_INITIATED 0x10 -#define __PTHREAD_PRIORITY_CBIT_DEFAULT 0x8 -#define __PTHREAD_PRIORITY_CBIT_UTILITY 0x4 -#define __PTHREAD_PRIORITY_CBIT_BACKGROUND 0x2 -#define __PTHREAD_PRIORITY_CBIT_MAINTENANCE 0x1 -#define __PTHREAD_PRIORITY_CBIT_UNSPECIFIED 0x0 - -static inline int -_pthread_qos_class_to_thread_qos(qos_class_t qos) -{ - switch (qos) { - case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE; - case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED; - case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY; - case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY; - case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND; - case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE; - default: return THREAD_QOS_UNSPECIFIED; - } -} - -static inline pthread_priority_t -_pthread_priority_make_newest(qos_class_t qc, int rel, unsigned long flags) -{ - pthread_priority_t cls; - switch (qc) { - case QOS_CLASS_USER_INTERACTIVE: cls = __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE; break; - case QOS_CLASS_USER_INITIATED: cls = __PTHREAD_PRIORITY_CBIT_USER_INITIATED; break; - case QOS_CLASS_DEFAULT: cls = __PTHREAD_PRIORITY_CBIT_DEFAULT; break; - case QOS_CLASS_UTILITY: cls = __PTHREAD_PRIORITY_CBIT_UTILITY; break; - case QOS_CLASS_BACKGROUND: cls = __PTHREAD_PRIORITY_CBIT_BACKGROUND; break; - case QOS_CLASS_MAINTENANCE: cls = __PTHREAD_PRIORITY_CBIT_MAINTENANCE; break; - case QOS_CLASS_UNSPECIFIED: - default: - cls = __PTHREAD_PRIORITY_CBIT_UNSPECIFIED; - rel = 1; // results in priority bits == 0 - break; - } - - pthread_priority_t p = - (flags & _PTHREAD_PRIORITY_FLAGS_MASK) | - ((cls << _PTHREAD_PRIORITY_QOS_CLASS_SHIFT) & _PTHREAD_PRIORITY_QOS_CLASS_MASK) | - (((uint8_t)rel - 1) & _PTHREAD_PRIORITY_PRIORITY_MASK); - - return p; -} - -static inline qos_class_t -_pthread_priority_get_qos_newest(pthread_priority_t priority) -{ - qos_class_t qc; - switch ((priority & _PTHREAD_PRIORITY_QOS_CLASS_MASK) >> _PTHREAD_PRIORITY_QOS_CLASS_SHIFT) { - case __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE: qc = QOS_CLASS_USER_INTERACTIVE; break; - case __PTHREAD_PRIORITY_CBIT_USER_INITIATED: qc = QOS_CLASS_USER_INITIATED; break; - case __PTHREAD_PRIORITY_CBIT_DEFAULT: qc = QOS_CLASS_DEFAULT; break; - case __PTHREAD_PRIORITY_CBIT_UTILITY: qc = QOS_CLASS_UTILITY; break; - case __PTHREAD_PRIORITY_CBIT_BACKGROUND: qc = QOS_CLASS_BACKGROUND; break; - case __PTHREAD_PRIORITY_CBIT_MAINTENANCE: qc = QOS_CLASS_MAINTENANCE; break; - case __PTHREAD_PRIORITY_CBIT_UNSPECIFIED: - default: qc = QOS_CLASS_UNSPECIFIED; break; - } - return qc; -} - -#define _pthread_priority_get_relpri(priority) \ - ((int8_t)((priority & _PTHREAD_PRIORITY_PRIORITY_MASK) >> _PTHREAD_PRIORITY_PRIORITY_SHIFT) + 1) - -#define _pthread_priority_get_flags(priority) \ - (priority & _PTHREAD_PRIORITY_FLAGS_MASK) - -#define _pthread_priority_split_newest(priority, qos, relpri) \ - ({ qos = _pthread_priority_get_qos_newest(priority); \ - relpri = (qos == QOS_CLASS_UNSPECIFIED) ? 0 : \ - _pthread_priority_get_relpri(priority); \ - }) - -#define _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL 0x1 -#define _PTHREAD_QOS_PARALLELISM_REALTIME 0x2 - /* userspace <-> kernel registration struct, for passing data to/from the kext during main thread init. */ struct _pthread_registration_data { /* @@ -177,9 +87,16 @@ struct _pthread_registration_data { uint32_t tsd_offset; /* copy-in */ uint32_t return_to_kernel_offset; /* copy-in */ uint32_t mach_thread_self_offset; /* copy-in */ + mach_vm_address_t stack_addr_hint; /* copy-out */ uint32_t mutex_default_policy; /* copy-out */ } __attribute__ ((packed)); +/* + * "error" flags returned by fail condvar syscalls + */ +#define ECVCLEARED 0x100 +#define ECVPREPOST 0x200 + #ifdef KERNEL /* The set of features, from the feature bits above, that we support. */ @@ -198,23 +115,16 @@ extern pthread_callbacks_t pthread_kern; struct ksyn_waitq_element { TAILQ_ENTRY(ksyn_waitq_element) kwe_list; /* link to other list members */ void * kwe_kwqqueue; /* queue blocked on */ - uint32_t kwe_state; /* state */ + thread_t kwe_thread; + uint16_t kwe_state; /* state */ + uint16_t kwe_flags; uint32_t kwe_lockseq; /* the sequence of the entry */ uint32_t kwe_count; /* upper bound on number of matches still pending */ uint32_t kwe_psynchretval; /* thread retval */ void *kwe_uth; /* uthread */ - uint64_t kwe_tid; /* tid of waiter */ }; typedef struct ksyn_waitq_element * ksyn_waitq_element_t; -pthread_priority_t thread_qos_get_pthread_priority(int qos) __attribute__((const)); -int thread_qos_get_class_index(int qos) __attribute__((const)); -int pthread_priority_get_thread_qos(pthread_priority_t priority) __attribute__((const)); -int pthread_priority_get_class_index(pthread_priority_t priority) __attribute__((const)); -pthread_priority_t class_index_get_pthread_priority(int index) __attribute__((const)); -int class_index_get_thread_qos(int index) __attribute__((const)); -int qos_class_get_class_index(int qos) __attribute__((const)); - #define PTH_DEFAULT_STACKSIZE 512*1024 #define MAX_PTHREAD_SIZE 64*1024 @@ -276,29 +186,24 @@ extern thread_call_t psynch_thcall; struct uthread* current_uthread(void); -#define WORKQ_REQTHREADS_THREADREQ 0x1 -#define WORKQ_REQTHREADS_NOEMERGENCY 0x2 - -// Call for the kernel's kevent system to request threads. A list of QoS/event -// counts should be provided, sorted by flags and then QoS class. If the -// identity of the thread to handle the request is known, it will be returned. -// If a new thread must be created, NULL will be returned. -thread_t _workq_reqthreads(struct proc *p, int requests_count, - workq_reqthreads_req_t requests); +int +workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr); -// Resolve a pthread_priority_t to a QoS/relative pri -integer_t _thread_qos_from_pthread_priority(unsigned long pri, unsigned long *flags); -// Clear out extraneous flags/pri info for putting in voucher -pthread_priority_t _pthread_priority_canonicalize(pthread_priority_t pri, boolean_t for_propagation); +int +workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr); -boolean_t _workq_thread_has_been_unbound(thread_t th, int qos_class); +void +workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr, + mach_port_name_t kport, int th_qos, int setup_flags, int upcall_flags); -int workq_kern_threadreq(struct proc *p, workq_threadreq_t req, - enum workq_threadreq_type, unsigned long priority, int flags); +int +workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map, + user_addr_t stackaddr, mach_port_name_t kport, + user_addr_t events, int nevents, int upcall_flags); -int workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t req, - enum workq_threadreq_op operation, - unsigned long arg1, unsigned long arg2); +void +workq_markfree_threadstack(proc_t p, thread_t th, vm_map_t vmap, + user_addr_t stackaddr); #endif // KERNEL diff --git a/kern/kern_policy.c b/kern/kern_policy.c deleted file mode 100644 index 98e0c61..0000000 --- a/kern/kern_policy.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2013 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include "kern_internal.h" -#include -#include - -pthread_priority_t -thread_qos_get_pthread_priority(int qos) -{ - /* Map the buckets we have in pthread_priority_t into a QoS tier. */ - switch (qos) { - case THREAD_QOS_USER_INTERACTIVE: return _pthread_priority_make_newest(QOS_CLASS_USER_INTERACTIVE, 0, 0); - case THREAD_QOS_USER_INITIATED: return _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, 0); - case THREAD_QOS_LEGACY: return _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, 0); - case THREAD_QOS_UTILITY: return _pthread_priority_make_newest(QOS_CLASS_UTILITY, 0, 0); - case THREAD_QOS_BACKGROUND: return _pthread_priority_make_newest(QOS_CLASS_BACKGROUND, 0, 0); - case THREAD_QOS_MAINTENANCE: return _pthread_priority_make_newest(QOS_CLASS_MAINTENANCE, 0, 0); - default: return _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0); - } -} - -int -thread_qos_get_class_index(int qos) -{ - switch (qos) { - case THREAD_QOS_USER_INTERACTIVE: return 0; - case THREAD_QOS_USER_INITIATED: return 1; - case THREAD_QOS_LEGACY: return 2; - case THREAD_QOS_UTILITY: return 3; - case THREAD_QOS_BACKGROUND: return 4; - case THREAD_QOS_MAINTENANCE: return 5; - default: return 2; - } -} - -int -pthread_priority_get_thread_qos(pthread_priority_t priority) -{ - /* Map the buckets we have in pthread_priority_t into a QoS tier. */ - switch (_pthread_priority_get_qos_newest(priority)) { - case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE; - case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED; - case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY; - case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY; - case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND; - case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE; - default: return THREAD_QOS_UNSPECIFIED; - } -} - -int -pthread_priority_get_class_index(pthread_priority_t priority) -{ - return qos_class_get_class_index(_pthread_priority_get_qos_newest(priority)); -} - -pthread_priority_t -class_index_get_pthread_priority(int index) -{ - qos_class_t qos; - switch (index) { - case 0: qos = QOS_CLASS_USER_INTERACTIVE; break; - case 1: qos = QOS_CLASS_USER_INITIATED; break; - case 2: qos = QOS_CLASS_DEFAULT; break; - case 3: qos = QOS_CLASS_UTILITY; break; - case 4: qos = QOS_CLASS_BACKGROUND; break; - case 5: qos = QOS_CLASS_MAINTENANCE; break; - case 6: assert(index != 6); // EVENT_MANAGER should be handled specially - default: - /* Return the utility band if we don't understand the input. */ - qos = QOS_CLASS_UTILITY; - } - - pthread_priority_t priority; - priority = _pthread_priority_make_newest(qos, 0, 0); - - return priority; -} - -int -class_index_get_thread_qos(int class) -{ - int thread_qos; - switch (class) { - case 0: thread_qos = THREAD_QOS_USER_INTERACTIVE; break; - case 1: thread_qos = THREAD_QOS_USER_INITIATED; break; - case 2: thread_qos = THREAD_QOS_LEGACY; break; - case 3: thread_qos = THREAD_QOS_UTILITY; break; - case 4: thread_qos = THREAD_QOS_BACKGROUND; break; - case 5: thread_qos = THREAD_QOS_MAINTENANCE; break; - case 6: thread_qos = THREAD_QOS_LAST; break; - default: - thread_qos = THREAD_QOS_LAST; - } - return thread_qos; -} - -int -qos_class_get_class_index(int qos) -{ - switch (qos){ - case QOS_CLASS_USER_INTERACTIVE: return 0; - case QOS_CLASS_USER_INITIATED: return 1; - case QOS_CLASS_DEFAULT: return 2; - case QOS_CLASS_UTILITY: return 3; - case QOS_CLASS_BACKGROUND: return 4; - case QOS_CLASS_MAINTENANCE: return 5; - default: - /* Return the default band if we don't understand the input. */ - return 2; - } -} - -/** - * Shims to help the kernel understand pthread_priority_t - */ - -integer_t -_thread_qos_from_pthread_priority(unsigned long priority, unsigned long *flags) -{ - if (flags != NULL){ - *flags = (int)_pthread_priority_get_flags(priority); - } - int thread_qos = pthread_priority_get_thread_qos(priority); - if (thread_qos == THREAD_QOS_UNSPECIFIED && flags != NULL){ - *flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - } - return thread_qos; -} - -pthread_priority_t -_pthread_priority_canonicalize(pthread_priority_t priority, boolean_t for_propagation) -{ - qos_class_t qos_class; - int relpri; - unsigned long flags = _pthread_priority_get_flags(priority); - _pthread_priority_split_newest(priority, qos_class, relpri); - - if (for_propagation) { - flags = 0; - if (relpri > 0 || relpri < -15) relpri = 0; - } else { - if (qos_class == QOS_CLASS_UNSPECIFIED) { - flags = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - } else if (flags & (_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG|_PTHREAD_PRIORITY_SCHED_PRI_FLAG)){ - flags = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - qos_class = QOS_CLASS_UNSPECIFIED; - } else { - flags &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - } - - relpri = 0; - } - - return _pthread_priority_make_newest(qos_class, relpri, flags); -} diff --git a/kern/kern_support.c b/kern/kern_support.c index 280a18b..e424cce 100644 --- a/kern/kern_support.c +++ b/kern/kern_support.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,8 +32,8 @@ #pragma mark - Front Matter -#define _PTHREAD_CONDATTR_T -#define _PTHREAD_COND_T +#define _PTHREAD_CONDATTR_T +#define _PTHREAD_COND_T #define _PTHREAD_MUTEXATTR_T #define _PTHREAD_MUTEX_T #define _PTHREAD_RWLOCKATTR_T @@ -105,11 +105,11 @@ extern void panic(const char *string, ...) __printflike(1,2) __dead2; #include #include -#include #include "kern_internal.h" -// XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE -#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP)) +#ifndef WQ_SETUP_EXIT_THREAD +#define WQ_SETUP_EXIT_THREAD 8 +#endif // XXX: Ditto for thread tags from kern/thread.h #define THREAD_TAG_MAINTHREAD 0x1 @@ -120,53 +120,13 @@ lck_grp_attr_t *pthread_lck_grp_attr; lck_grp_t *pthread_lck_grp; lck_attr_t *pthread_lck_attr; -zone_t pthread_zone_workqueue; -zone_t pthread_zone_threadlist; -zone_t pthread_zone_threadreq; - -extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64); -extern void workqueue_thread_yielded(void); - -#define WQ_SETUP_FIRST_USE 1 -#define WQ_SETUP_CLEAR_VOUCHER 2 -static void _setup_wqthread(proc_t p, thread_t th, struct workqueue *wq, - struct threadlist *tl, int flags); - -static void reset_priority(struct threadlist *tl, pthread_priority_t pri); -static pthread_priority_t pthread_priority_from_wq_class_index(struct workqueue *wq, int index); - -static void wq_unpark_continue(void* ptr, wait_result_t wait_result) __dead2; - -static bool workqueue_addnewthread(proc_t p, struct workqueue *wq); -static void workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use); -static void workqueue_lock_spin(struct workqueue *); -static void workqueue_unlock(struct workqueue *); - -#define WQ_RUN_TR_THROTTLED 0 -#define WQ_RUN_TR_THREAD_NEEDED 1 -#define WQ_RUN_TR_THREAD_STARTED 2 -#define WQ_RUN_TR_EXITING 3 -static int workqueue_run_threadreq_and_unlock(proc_t p, struct workqueue *wq, - struct threadlist *tl, struct threadreq *req, bool may_add_new_thread); - -static bool may_start_constrained_thread(struct workqueue *wq, - uint32_t at_priclass, struct threadlist *tl, bool may_start_timer); - -static mach_vm_offset_t stack_addr_hint(proc_t p, vm_map_t vmap); -static boolean_t wq_thread_is_busy(uint64_t cur_ts, - _Atomic uint64_t *lastblocked_tsp); - -int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc); -int proc_setalltargetconc(pid_t pid, int32_t * targetconcp); - -#define WQ_MAXPRI_MIN 0 /* low prio queue num */ -#define WQ_MAXPRI_MAX 2 /* max prio queuenum */ -#define WQ_PRI_NUM 3 /* number of prio work queues */ - #define C_32_STK_ALIGN 16 #define C_64_STK_ALIGN 16 #define C_64_REDZONE_LEN 128 +// WORKQ use the largest alignment any platform needs +#define C_WORKQ_STK_ALIGN 16 + #define PTHREAD_T_OFFSET 0 /* @@ -177,11 +137,12 @@ _________________________________________ ----------------------------------------- */ -#define PTHREAD_START_CUSTOM 0x01000000 +#define PTHREAD_START_CUSTOM 0x01000000 // #define PTHREAD_START_SETSCHED 0x02000000 -#define PTHREAD_START_DETACHED 0x04000000 +// was PTHREAD_START_DETACHED 0x04000000 #define PTHREAD_START_QOSCLASS 0x08000000 #define PTHREAD_START_TSD_BASE_SET 0x10000000 +#define PTHREAD_START_SUSPENDED 0x20000000 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff #define PTHREAD_START_POLICY_BITSHIFT 16 #define PTHREAD_START_POLICY_MASK 0xff @@ -193,199 +154,13 @@ _________________________________________ #define BASEPRI_DEFAULT 31 -#pragma mark sysctls - -static uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS; -static uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS; -static uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS; -static uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS; -static uint32_t wq_max_constrained_threads = WORKQUEUE_MAXTHREADS / 8; -static uint32_t wq_max_concurrency[WORKQUEUE_NUM_BUCKETS + 1]; // set to ncpus on load - -SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_stalled_window_usecs, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_reduce_pool_window_usecs, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_max_timer_interval_usecs, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_max_threads, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_max_constrained_threads, 0, ""); - -#ifdef DEBUG -static int wq_kevent_test SYSCTL_HANDLER_ARGS; -SYSCTL_PROC(_debug, OID_AUTO, wq_kevent_test, CTLFLAG_MASKED | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLTYPE_OPAQUE, NULL, 0, wq_kevent_test, 0, "-"); -#endif - -static uint32_t wq_init_constrained_limit = 1; - uint32_t pthread_debug_tracing = 1; -SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED, - &pthread_debug_tracing, 0, "") - static uint32_t pthread_mutex_default_policy; SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED, &pthread_mutex_default_policy, 0, ""); -/* - * +-----+-----+-----+-----+-----+-----+-----+ - * | MT | BG | UT | DE | IN | UN | mgr | - * +-----+-----+-----+-----+-----+-----+-----+-----+ - * | pri | 5 | 4 | 3 | 2 | 1 | 0 | 6 | - * | qos | 1 | 2 | 3 | 4 | 5 | 6 | 7 | - * +-----+-----+-----+-----+-----+-----+-----+-----+ - */ -static inline uint32_t -_wq_bucket_to_thread_qos(int pri) -{ - if (pri == WORKQUEUE_EVENT_MANAGER_BUCKET) { - return WORKQUEUE_EVENT_MANAGER_BUCKET + 1; - } - return WORKQUEUE_EVENT_MANAGER_BUCKET - pri; -} - -#pragma mark wq_thactive - -#if defined(__LP64__) -// Layout is: -// 7 * 16 bits for each QoS bucket request count (including manager) -// 3 bits of best QoS among all pending constrained requests -// 13 bits of zeroes -#define WQ_THACTIVE_BUCKET_WIDTH 16 -#define WQ_THACTIVE_QOS_SHIFT (7 * WQ_THACTIVE_BUCKET_WIDTH) -#else -// Layout is: -// 6 * 10 bits for each QoS bucket request count (except manager) -// 1 bit for the manager bucket -// 3 bits of best QoS among all pending constrained requests -#define WQ_THACTIVE_BUCKET_WIDTH 10 -#define WQ_THACTIVE_QOS_SHIFT (6 * WQ_THACTIVE_BUCKET_WIDTH + 1) -#endif -#define WQ_THACTIVE_BUCKET_MASK ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1) -#define WQ_THACTIVE_BUCKET_HALF (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1)) -#define WQ_THACTIVE_NO_PENDING_REQUEST 6 - -_Static_assert(sizeof(wq_thactive_t) * CHAR_BIT - WQ_THACTIVE_QOS_SHIFT >= 3, - "Make sure we have space to encode a QoS"); - -static inline wq_thactive_t -_wq_thactive_fetch_and_add(struct workqueue *wq, wq_thactive_t offset) -{ -#if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__) - return atomic_fetch_add_explicit(&wq->wq_thactive, offset, - memory_order_relaxed); -#else - return pthread_kern->atomic_fetch_add_128_relaxed(&wq->wq_thactive, offset); -#endif -} - -static inline wq_thactive_t -_wq_thactive(struct workqueue *wq) -{ -#if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__) - return atomic_load_explicit(&wq->wq_thactive, memory_order_relaxed); -#else - return pthread_kern->atomic_load_128_relaxed(&wq->wq_thactive); -#endif -} - -#define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \ - ((tha) >> WQ_THACTIVE_QOS_SHIFT) - -static inline uint32_t -_wq_thactive_best_constrained_req_qos(struct workqueue *wq) -{ - // Avoid expensive atomic operations: the three bits we're loading are in - // a single byte, and always updated under the workqueue lock - wq_thactive_t v = *(wq_thactive_t *)&wq->wq_thactive; - return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v); -} - -static inline wq_thactive_t -_wq_thactive_set_best_constrained_req_qos(struct workqueue *wq, - uint32_t orig_qos, uint32_t new_qos) -{ - wq_thactive_t v; - v = (wq_thactive_t)(new_qos - orig_qos) << WQ_THACTIVE_QOS_SHIFT; - /* - * We can do an atomic add relative to the initial load because updates - * to this qos are always serialized under the workqueue lock. - */ - return _wq_thactive_fetch_and_add(wq, v) + v; -} - -static inline wq_thactive_t -_wq_thactive_offset_for_qos(int qos) -{ - return (wq_thactive_t)1 << (qos * WQ_THACTIVE_BUCKET_WIDTH); -} - -static inline wq_thactive_t -_wq_thactive_inc(struct workqueue *wq, int qos) -{ - return _wq_thactive_fetch_and_add(wq, _wq_thactive_offset_for_qos(qos)); -} - -static inline wq_thactive_t -_wq_thactive_dec(struct workqueue *wq, int qos) -{ - return _wq_thactive_fetch_and_add(wq, -_wq_thactive_offset_for_qos(qos)); -} - -static inline wq_thactive_t -_wq_thactive_move(struct workqueue *wq, int oldqos, int newqos) -{ - return _wq_thactive_fetch_and_add(wq, _wq_thactive_offset_for_qos(newqos) - - _wq_thactive_offset_for_qos(oldqos)); -} - -static inline uint32_t -_wq_thactive_aggregate_downto_qos(struct workqueue *wq, wq_thactive_t v, - int qos, uint32_t *busycount, uint32_t *max_busycount) -{ - uint32_t count = 0, active; - uint64_t curtime; - -#ifndef __LP64__ - /* - * on 32bits the manager bucket is a single bit and the best constrained - * request QoS 3 bits are where the 10 bits of a regular QoS bucket count - * would be. Mask them out. - */ - v &= ~(~0ull << WQ_THACTIVE_QOS_SHIFT); -#endif - if (busycount) { - curtime = mach_absolute_time(); - *busycount = 0; - } - if (max_busycount) { - *max_busycount = qos + 1; - } - for (int i = 0; i <= qos; i++, v >>= WQ_THACTIVE_BUCKET_WIDTH) { - active = v & WQ_THACTIVE_BUCKET_MASK; - count += active; - if (busycount && wq->wq_thscheduled_count[i] > active) { - if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i])) { - /* - * We only consider the last blocked thread for a given bucket - * as busy because we don't want to take the list lock in each - * sched callback. However this is an approximation that could - * contribute to thread creation storms. - */ - (*busycount)++; - } - } - } - return count; -} - #pragma mark - Process/Thread Setup/Teardown syscalls static mach_vm_offset_t @@ -445,41 +220,45 @@ stack_addr_hint(proc_t p, vm_map_t vmap) return stackaddr; } +static bool +_pthread_priority_to_policy(pthread_priority_t priority, + thread_qos_policy_data_t *data) +{ + data->qos_tier = _pthread_priority_thread_qos(priority); + data->tier_importance = _pthread_priority_relpri(priority); + if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 || + data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { + return false; + } + return true; +} + /** * bsdthread_create system call. Used by pthread_create. */ int -_bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval) +_bsdthread_create(struct proc *p, + __unused user_addr_t user_func, __unused user_addr_t user_funcarg, + user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, + user_addr_t *retval) { kern_return_t kret; void * sright; int error = 0; - int allocated = 0; - mach_vm_offset_t stackaddr; - mach_vm_size_t th_allocsize = 0; - mach_vm_size_t th_guardsize; - mach_vm_offset_t th_stack; - mach_vm_offset_t th_pthread; mach_vm_offset_t th_tsd_base; mach_port_name_t th_thport; thread_t th; - vm_map_t vmap = pthread_kern->current_map(); task_t ctask = current_task(); unsigned int policy, importance; uint32_t tsd_offset; - - int isLP64 = 0; + bool start_suspended = (flags & PTHREAD_START_SUSPENDED); if (pthread_kern->proc_get_register(p) == 0) { return EINVAL; } - PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0); - - isLP64 = proc_is64bit(p); - th_guardsize = vm_map_page_size(vmap); + PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0); - stackaddr = pthread_kern->proc_get_stack_addr_hint(p); kret = pthread_kern->thread_create(ctask, &th); if (kret != KERN_SUCCESS) return(ENOMEM); @@ -495,152 +274,64 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar } if ((flags & PTHREAD_START_CUSTOM) == 0) { - mach_vm_size_t pthread_size = - vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(vmap)); - th_allocsize = th_guardsize + user_stack + pthread_size; - user_stack += PTHREAD_T_OFFSET; - - kret = mach_vm_map(vmap, &stackaddr, - th_allocsize, - page_size-1, - VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL, - 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, - VM_INHERIT_DEFAULT); - if (kret != KERN_SUCCESS){ - kret = mach_vm_allocate(vmap, - &stackaddr, th_allocsize, - VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE); - } - if (kret != KERN_SUCCESS) { - error = ENOMEM; - goto out; - } - - PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0); - - allocated = 1; - /* - * The guard page is at the lowest address - * The stack base is the highest address - */ - kret = mach_vm_protect(vmap, stackaddr, th_guardsize, FALSE, VM_PROT_NONE); - - if (kret != KERN_SUCCESS) { - error = ENOMEM; - goto out1; - } - - th_pthread = stackaddr + th_guardsize + user_stack; - th_stack = th_pthread; - - /* - * Pre-fault the first page of the new thread's stack and the page that will - * contain the pthread_t structure. - */ - if (vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) != - vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap))){ - vm_fault( vmap, - vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); - } - - vm_fault( vmap, - vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap)), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); - - } else { - th_stack = user_stack; - th_pthread = user_pthread; - - PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0); + error = EINVAL; + goto out; } + PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3); + tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); if (tsd_offset) { - th_tsd_base = th_pthread + tsd_offset; + th_tsd_base = user_pthread + tsd_offset; kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base); if (kret == KERN_SUCCESS) { flags |= PTHREAD_START_TSD_BASE_SET; } } + /* + * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel + * supports this flag (after the fact). + */ + flags &= ~PTHREAD_START_SUSPENDED; -#if defined(__i386__) || defined(__x86_64__) /* - * Set up i386 registers & function call. + * Set up registers & function call. */ - if (isLP64 == 0) { - x86_thread_state32_t state = { - .eip = (unsigned int)pthread_kern->proc_get_threadstart(p), - .eax = (unsigned int)th_pthread, - .ebx = (unsigned int)th_thport, - .ecx = (unsigned int)user_func, - .edx = (unsigned int)user_funcarg, - .edi = (unsigned int)user_stack, - .esi = (unsigned int)flags, - /* - * set stack pointer - */ - .esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN)) +#if defined(__i386__) || defined(__x86_64__) + if (proc_is64bit_data(p)) { + x86_thread_state64_t state = { + .rip = (uint64_t)pthread_kern->proc_get_threadstart(p), + .rdi = (uint64_t)user_pthread, + .rsi = (uint64_t)th_thport, + .rdx = (uint64_t)user_func, /* golang wants this */ + .rcx = (uint64_t)user_funcarg, /* golang wants this */ + .r8 = (uint64_t)user_stack, /* golang wants this */ + .r9 = (uint64_t)flags, + + .rsp = (uint64_t)(user_stack - C_64_REDZONE_LEN) }; - error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); - if (error != KERN_SUCCESS) { - error = EINVAL; - goto out; - } + (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state); } else { - x86_thread_state64_t state64 = { - .rip = (uint64_t)pthread_kern->proc_get_threadstart(p), - .rdi = (uint64_t)th_pthread, - .rsi = (uint64_t)(th_thport), - .rdx = (uint64_t)user_func, - .rcx = (uint64_t)user_funcarg, - .r8 = (uint64_t)user_stack, - .r9 = (uint64_t)flags, - /* - * set stack pointer aligned to 16 byte boundary - */ - .rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN) + x86_thread_state32_t state = { + .eip = (uint32_t)pthread_kern->proc_get_threadstart(p), + .eax = (uint32_t)user_pthread, + .ebx = (uint32_t)th_thport, + .ecx = (uint32_t)user_func, /* golang wants this */ + .edx = (uint32_t)user_funcarg, /* golang wants this */ + .edi = (uint32_t)user_stack, /* golang wants this */ + .esi = (uint32_t)flags, + + .esp = (int)((vm_offset_t)(user_stack - C_32_STK_ALIGN)) }; - error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64); - if (error != KERN_SUCCESS) { - error = EINVAL; - goto out; - } - + (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); } -#elif defined(__arm__) - arm_thread_state_t state = { - .pc = (int)pthread_kern->proc_get_threadstart(p), - .r[0] = (unsigned int)th_pthread, - .r[1] = (unsigned int)th_thport, - .r[2] = (unsigned int)user_func, - .r[3] = (unsigned int)user_funcarg, - .r[4] = (unsigned int)user_stack, - .r[5] = (unsigned int)flags, - - /* Set r7 & lr to 0 for better back tracing */ - .r[7] = 0, - .lr = 0, - - /* - * set stack pointer - */ - .sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN)) - }; - - (void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); - #else #error bsdthread_create not defined for this architecture #endif - if ((flags & PTHREAD_START_SETSCHED) != 0) { + if (flags & PTHREAD_START_SETSCHED) { /* Set scheduling parameters if needed */ thread_extended_policy_data_t extinfo; thread_precedence_policy_data_t precedinfo; @@ -658,16 +349,16 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar precedinfo.importance = (importance - BASEPRI_DEFAULT); thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT); - } else if ((flags & PTHREAD_START_QOSCLASS) != 0) { + } else if (flags & PTHREAD_START_QOSCLASS) { /* Set thread QoS class if requested. */ - pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK); - thread_qos_policy_data_t qos; - qos.qos_tier = pthread_priority_get_thread_qos(priority); - qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : - _pthread_priority_get_relpri(priority); - pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); + if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) { + error = EINVAL; + goto out; + } + pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, + (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); } if (pthread_kern->proc_get_mach_thread_self_tsd_offset) { @@ -677,37 +368,33 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar bool proc64bit = proc_is64bit(p); if (proc64bit) { uint64_t th_thport_tsd = (uint64_t)th_thport; - error = copyout(&th_thport_tsd, th_pthread + tsd_offset + + error = copyout(&th_thport_tsd, user_pthread + tsd_offset + mach_thread_self_offset, sizeof(th_thport_tsd)); } else { uint32_t th_thport_tsd = (uint32_t)th_thport; - error = copyout(&th_thport_tsd, th_pthread + tsd_offset + + error = copyout(&th_thport_tsd, user_pthread + tsd_offset + mach_thread_self_offset, sizeof(th_thport_tsd)); } if (error) { - goto out1; + goto out; } } } - kret = pthread_kern->thread_resume(th); - if (kret != KERN_SUCCESS) { - error = EINVAL; - goto out1; + if (!start_suspended) { + kret = pthread_kern->thread_resume(th); + if (kret != KERN_SUCCESS) { + error = EINVAL; + goto out; + } } thread_deallocate(th); /* drop the creator reference */ - PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0); - - // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms - *retval = (user_addr_t)th_pthread; + PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0); + *retval = user_pthread; return(0); -out1: - if (allocated != 0) { - (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize); - } out: (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport); if (pthread_kern->thread_will_park_or_terminate) { @@ -737,21 +424,24 @@ _bsdthread_terminate(__unused struct proc *p, freeaddr = (mach_vm_offset_t)stackaddr; freesize = size; - PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0); + PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff); if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) { if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){ vm_map_t user_map = pthread_kern->current_map(); freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map)); kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE); - assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS); +#if MACH_ASSERT + if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) { + os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret); + } +#endif kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE); assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS); } else { kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize); if (kret != KERN_SUCCESS) { - PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0); - return(EINVAL); + PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0); } } } @@ -761,10 +451,9 @@ _bsdthread_terminate(__unused struct proc *p, } (void)thread_terminate(th); if (sem != MACH_PORT_NULL) { - kret = pthread_kern->semaphore_signal_internal_trap(sem); + kret = pthread_kern->semaphore_signal_internal_trap(sem); if (kret != KERN_SUCCESS) { - PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0); - return(EINVAL); + PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0); } } @@ -772,14 +461,10 @@ _bsdthread_terminate(__unused struct proc *p, pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport); } - PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0); + PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0); pthread_kern->thread_exception_return(); - panic("bsdthread_terminate: still running\n"); - - PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0); - - return(0); + __builtin_unreachable(); } /** @@ -873,29 +558,35 @@ _bsdthread_register(struct proc *p, if (pthread_init_data != 0) { /* Outgoing data that userspace expects as a reply */ data.version = sizeof(struct _pthread_registration_data); + data.main_qos = _pthread_unspecified_priority(); + if (pthread_kern->qos_main_thread_active()) { mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT; thread_qos_policy_data_t qos; boolean_t gd = FALSE; - kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd); + kr = pthread_kern->thread_policy_get(current_thread(), + THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd); if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) { - /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */ + /* + * Unspecified threads means the kernel wants us + * to impose legacy upon the thread. + */ qos.qos_tier = THREAD_QOS_LEGACY; qos.tier_importance = 0; - kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); + kr = pthread_kern->thread_policy_set_internal(current_thread(), + THREAD_QOS_POLICY, (thread_policy_t)&qos, + THREAD_QOS_POLICY_COUNT); } if (kr == KERN_SUCCESS) { - data.main_qos = thread_qos_get_pthread_priority(qos.qos_tier); - } else { - data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0); + data.main_qos = _pthread_priority_make_from_thread_qos( + qos.qos_tier, 0, 0); } - } else { - data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0); } + data.stack_addr_hint = stackaddr; data.mutex_default_policy = pthread_mutex_default_policy; kr = copyout(&data, pthread_init_data, pthread_init_sz); @@ -910,2858 +601,220 @@ _bsdthread_register(struct proc *p, return(0); } -#pragma mark - QoS Manipulation + +#pragma mark - Workqueue Thread Support + +static mach_vm_size_t +workq_thread_allocsize(proc_t p, vm_map_t wq_map, + mach_vm_size_t *guardsize_out) +{ + mach_vm_size_t guardsize = vm_map_page_size(wq_map); + mach_vm_size_t pthread_size = vm_map_round_page_mask( + pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, + vm_map_page_mask(wq_map)); + if (guardsize_out) *guardsize_out = guardsize; + return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size; +} int -_bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval) +workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr) { - int rv; - thread_t th; + mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p); + mach_vm_size_t guardsize, th_allocsize; + kern_return_t kret; - pthread_priority_t priority; + th_allocsize = workq_thread_allocsize(p, vmap, &guardsize); + kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1, + VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE, + VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); - /* Unused parameters must be zero. */ - if (arg3 != 0) { - return EINVAL; + if (kret != KERN_SUCCESS) { + kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize, + VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE); } - /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */ - if (proc_is64bit(p)) { - uint64_t v; - rv = copyin(tsd_priority_addr, &v, sizeof(v)); - if (rv) goto out; - priority = (int)(v & 0xffffffff); - } else { - uint32_t v; - rv = copyin(tsd_priority_addr, &v, sizeof(v)); - if (rv) goto out; - priority = v; + if (kret != KERN_SUCCESS) { + goto fail; } - if ((th = port_name_to_thread(kport)) == THREAD_NULL) { - return ESRCH; + /* + * The guard page is at the lowest address + * The stack base is the highest address + */ + kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE); + if (kret != KERN_SUCCESS) { + goto fail_vm_deallocate; } - /* Disable pthread_set_qos_class_np() on threads other than pthread_self */ - if (th != current_thread()) { - thread_deallocate(th); - return EPERM; + if (out_addr) { + *out_addr = stackaddr; } + return 0; - rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval); - - /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */ - /* pthread_kern->thread_static_param(th, TRUE); */ // see , for details - - thread_deallocate(th); - -out: - return rv; +fail_vm_deallocate: + (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize); +fail: + return kret; } -static inline struct threadlist * -util_get_thread_threadlist_entry(thread_t th) +int +workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr) { - struct uthread *uth = pthread_kern->get_bsdthread_info(th); - if (uth) { - struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); - return tl; - } - return NULL; + return mach_vm_deallocate(vmap, stackaddr, + workq_thread_allocsize(p, vmap, NULL)); } -boolean_t -_workq_thread_has_been_unbound(thread_t th, int qos_class) -{ - struct threadlist *tl = util_get_thread_threadlist_entry(th); - if (!tl) { - return FALSE; +void +workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th, + vm_map_t vmap, user_addr_t stackaddr) +{ + // Keep this in sync with workq_setup_thread() + const vm_size_t guardsize = vm_map_page_size(vmap); + const user_addr_t freeaddr = (user_addr_t)stackaddr + guardsize; + const vm_map_offset_t freesize = vm_map_trunc_page_mask( + (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, + vm_map_page_mask(vmap)) - guardsize; + + __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr, + freesize, VM_BEHAVIOR_REUSABLE); +#if MACH_ASSERT + if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) { + os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr); } +#endif +} - struct workqueue *wq = tl->th_workq; - workqueue_lock_spin(wq); - - if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { - goto failure; - } else if (qos_class != class_index_get_thread_qos(tl->th_priority)) { - goto failure; - } +struct workq_thread_addrs { + user_addr_t self; + user_addr_t stack_bottom; + user_addr_t stack_top; +}; - if ((tl->th_flags & TH_LIST_KEVENT_BOUND)){ - goto failure; - } - tl->th_flags &= ~TH_LIST_KEVENT_BOUND; +static inline void +workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr) +{ + th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN); +} - workqueue_unlock(wq); - return TRUE; +static void +workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr, + struct workq_thread_addrs *th_addrs) +{ + const vm_size_t guardsize = vm_map_page_size(map); -failure: - workqueue_unlock(wq); - return FALSE; + th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE + + guardsize + PTHREAD_T_OFFSET); + workq_thread_set_top_addr(th_addrs, th_addrs->self); + th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize); } -int -_bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval) +static inline void +workq_set_register_state(proc_t p, thread_t th, + struct workq_thread_addrs *addrs, mach_port_name_t kport, + user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count) { - thread_qos_policy_data_t qos; - mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT; - boolean_t gd = FALSE; - thread_t th = current_thread(); - struct workqueue *wq = NULL; - struct threadlist *tl = NULL; + user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p); + if (!wqstart_fnptr) { + panic("workqueue thread start function pointer is NULL"); + } - kern_return_t kr; - int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0; +#if defined(__i386__) || defined(__x86_64__) + if (proc_is64bit_data(p) == 0) { + x86_thread_state32_t state = { + .eip = (unsigned int)wqstart_fnptr, + .eax = /* arg0 */ (unsigned int)addrs->self, + .ebx = /* arg1 */ (unsigned int)kport, + .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom, + .edx = /* arg3 */ (unsigned int)kevent_list, + .edi = /* arg4 */ (unsigned int)upcall_flags, + .esi = /* arg5 */ (unsigned int)kevent_count, - if ((flags & _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND) != 0) { - tl = util_get_thread_threadlist_entry(th); - if (tl) { - wq = tl->th_workq; - } else { - goto qos; + .esp = (int)((vm_offset_t)addrs->stack_top), + }; + + int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); + if (error != KERN_SUCCESS) { + panic(__func__ ": thread_set_wq_state failed: %d", error); } + } else { + x86_thread_state64_t state64 = { + // x86-64 already passes all the arguments in registers, so we just put them in their final place here + .rip = (uint64_t)wqstart_fnptr, + .rdi = (uint64_t)addrs->self, + .rsi = (uint64_t)kport, + .rdx = (uint64_t)addrs->stack_bottom, + .rcx = (uint64_t)kevent_list, + .r8 = (uint64_t)upcall_flags, + .r9 = (uint64_t)kevent_count, - workqueue_lock_spin(wq); - if (tl->th_flags & TH_LIST_KEVENT_BOUND) { - tl->th_flags &= ~TH_LIST_KEVENT_BOUND; - unsigned int kevent_flags = KEVENT_FLAG_WORKQ | KEVENT_FLAG_UNBIND_CHECK_FLAGS; - if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { - kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER; - } + .rsp = (uint64_t)(addrs->stack_top) + }; - workqueue_unlock(wq); - __assert_only int ret = kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags); - assert(ret == 0); - } else { - workqueue_unlock(wq); + int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64); + if (error != KERN_SUCCESS) { + panic(__func__ ": thread_set_wq_state failed: %d", error); } } +#else +#error setup_wqthread not defined for this architecture +#endif +} -qos: - if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) { - kr = pthread_kern->thread_policy_get(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd); - if (kr != KERN_SUCCESS) { - qos_rv = EINVAL; - goto voucher; - } +static int +workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, int upcall_flags, + user_addr_t eventlist, int nevents, int kevent_flags, + user_addr_t *kevent_list_out, int *kevent_count_out) +{ + bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP; + int kevent_count = WQ_KEVENT_LIST_LEN; + user_addr_t kevent_list = th_addrs->self - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s); + user_addr_t kevent_id_addr = kevent_list; + kqueue_id_t kevent_id = -1; + int ret; + if (workloop) { /* - * If we have main-thread QoS then we don't allow a thread to come out - * of QOS_CLASS_UNSPECIFIED. + * The kevent ID goes just below the kevent list. Sufficiently new + * userspace will know to look there. Old userspace will just + * ignore it. */ - if (pthread_kern->qos_main_thread_active() && qos.qos_tier == - THREAD_QOS_UNSPECIFIED) { - qos_rv = EPERM; - goto voucher; - } + kevent_id_addr -= sizeof(kqueue_id_t); + } - if (!tl) { - tl = util_get_thread_threadlist_entry(th); - if (tl) wq = tl->th_workq; - } + user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE; + user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE; - PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0); + if (workloop) { + kevent_flags |= KEVENT_FLAG_WORKLOOP; + ret = kevent_id_internal(p, &kevent_id, + eventlist, nevents, kevent_list, kevent_count, + kevent_data_buf, &kevent_data_available, + kevent_flags, &kevent_count); + copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id)); + } else { + kevent_flags |= KEVENT_FLAG_WORKQ; + ret = kevent_qos_internal(p, -1, eventlist, nevents, kevent_list, + kevent_count, kevent_data_buf, &kevent_data_available, + kevent_flags, &kevent_count); + } - qos.qos_tier = pthread_priority_get_thread_qos(priority); - qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority); + // squash any errors into just empty output + if (ret != 0 || kevent_count == -1) { + *kevent_list_out = NULL; + *kevent_count_out = 0; + return ret; + } - if (qos.qos_tier == QOS_CLASS_UNSPECIFIED || - qos.tier_importance > 0 || qos.tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { - qos_rv = EINVAL; - goto voucher; - } - - /* - * If we're a workqueue, the threadlist item priority needs adjusting, - * along with the bucket we were running in. - */ - if (tl) { - bool try_run_threadreq = false; - - workqueue_lock_spin(wq); - kr = pthread_kern->thread_set_workq_qos(th, qos.qos_tier, qos.tier_importance); - assert(kr == KERN_SUCCESS || kr == KERN_TERMINATED); - - /* Fix up counters. */ - uint8_t old_bucket = tl->th_priority; - uint8_t new_bucket = pthread_priority_get_class_index(priority); - - if (old_bucket != new_bucket) { - _wq_thactive_move(wq, old_bucket, new_bucket); - wq->wq_thscheduled_count[old_bucket]--; - wq->wq_thscheduled_count[new_bucket]++; - if (old_bucket == WORKQUEUE_EVENT_MANAGER_BUCKET || - old_bucket < new_bucket) { - /* - * if the QoS of the thread was lowered, then this could - * allow for a higher QoS thread request to run, so we need - * to reevaluate. - */ - try_run_threadreq = true; - } - tl->th_priority = new_bucket; - } - - bool old_overcommit = !(tl->th_flags & TH_LIST_CONSTRAINED); - bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG; - if (!old_overcommit && new_overcommit) { - if (wq->wq_constrained_threads_scheduled-- == - wq_max_constrained_threads) { - try_run_threadreq = true; - } - tl->th_flags &= ~TH_LIST_CONSTRAINED; - } else if (old_overcommit && !new_overcommit) { - wq->wq_constrained_threads_scheduled++; - tl->th_flags |= TH_LIST_CONSTRAINED; - } - - if (try_run_threadreq) { - workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true); - } else { - workqueue_unlock(wq); - } - } else { - kr = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT); - if (kr != KERN_SUCCESS) { - qos_rv = EINVAL; - } - } - - PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0); - } - -voucher: - if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) { - kr = pthread_kern->thread_set_voucher_name(voucher); - if (kr != KERN_SUCCESS) { - voucher_rv = ENOENT; - goto fixedpri; - } - } - -fixedpri: - if (qos_rv) goto done; - if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) { - thread_extended_policy_data_t extpol = {.timeshare = 0}; - - if (!tl) tl = util_get_thread_threadlist_entry(th); - if (tl) { - /* Not allowed on workqueue threads */ - fixedpri_rv = ENOTSUP; - goto done; - } - - kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT); - if (kr != KERN_SUCCESS) { - fixedpri_rv = EINVAL; - goto done; - } - } else if ((flags & _PTHREAD_SET_SELF_TIMESHARE_FLAG) != 0) { - thread_extended_policy_data_t extpol = {.timeshare = 1}; - - if (!tl) tl = util_get_thread_threadlist_entry(th); - if (tl) { - /* Not allowed on workqueue threads */ - fixedpri_rv = ENOTSUP; - goto done; - } - - kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT); - if (kr != KERN_SUCCESS) { - fixedpri_rv = EINVAL; - goto done; - } - } - -done: - if (qos_rv && voucher_rv) { - /* Both failed, give that a unique error. */ - return EBADMSG; - } - - if (qos_rv) { - return qos_rv; - } - - if (voucher_rv) { - return voucher_rv; - } - - if (fixedpri_rv) { - return fixedpri_rv; - } - - return 0; -} - -int -_bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval) -{ - thread_t th; - int rv = 0; - - if ((th = port_name_to_thread(kport)) == THREAD_NULL) { - return ESRCH; - } - - int override_qos = pthread_priority_get_thread_qos(priority); - - struct threadlist *tl = util_get_thread_threadlist_entry(th); - if (tl) { - PTHREAD_TRACE_WQ(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0); - } - - /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */ - pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE, - resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE, USER_ADDR_NULL, MACH_PORT_NULL); - thread_deallocate(th); - return rv; -} - -int -_bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int __unused *retval) -{ - thread_t th; - int rv = 0; - - if (arg3 != 0) { - return EINVAL; - } - - if ((th = port_name_to_thread(kport)) == THREAD_NULL) { - return ESRCH; - } - - struct uthread *uth = pthread_kern->get_bsdthread_info(th); - - struct threadlist *tl = util_get_thread_threadlist_entry(th); - if (tl) { - PTHREAD_TRACE_WQ(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 0, 0, 0); - } - - pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE); - - thread_deallocate(th); - return rv; -} - -static int -_bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, user_addr_t ulock_addr) -{ - thread_t th; - int rv = 0; - - if ((th = port_name_to_thread(kport)) == THREAD_NULL) { - return ESRCH; - } - - int override_qos = pthread_priority_get_thread_qos(priority); - - struct threadlist *tl = util_get_thread_threadlist_entry(th); - if (!tl) { - thread_deallocate(th); - return EPERM; - } - - PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0); - - rv = pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE, - resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE, ulock_addr, kport); - - thread_deallocate(th); - return rv; -} - -int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd, - mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval) -{ - return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, resource, USER_ADDR_NULL); -} - -int -_bsdthread_ctl_qos_override_dispatch(struct proc *p __unused, user_addr_t cmd __unused, mach_port_name_t kport, pthread_priority_t priority, user_addr_t ulock_addr, int __unused *retval) -{ - return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, USER_ADDR_NULL, ulock_addr); -} - -int -_bsdthread_ctl_qos_override_reset(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval) -{ - if (arg1 != 0 || arg2 != 0 || arg3 != 0) { - return EINVAL; - } - - return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, 1 /* reset_all */, 0, 0, retval); -} - -int -_bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval) -{ - if ((reset_all && (resource != 0)) || arg3 != 0) { - return EINVAL; - } - - thread_t th = current_thread(); - struct uthread *uth = pthread_kern->get_bsdthread_info(th); - struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); - - if (!tl) { - return EPERM; - } - - PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, 0, 0, 0, 0); - - resource = reset_all ? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD : resource; - pthread_kern->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE); - - return 0; -} - -static int -_bsdthread_ctl_max_parallelism(struct proc __unused *p, user_addr_t __unused cmd, - int qos, unsigned long flags, int *retval) -{ - _Static_assert(QOS_PARALLELISM_COUNT_LOGICAL == - _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL, "logical"); - _Static_assert(QOS_PARALLELISM_REALTIME == - _PTHREAD_QOS_PARALLELISM_REALTIME, "realtime"); - - if (flags & ~(QOS_PARALLELISM_REALTIME | QOS_PARALLELISM_COUNT_LOGICAL)) { - return EINVAL; - } - - if (flags & QOS_PARALLELISM_REALTIME) { - if (qos) { - return EINVAL; - } - } else if (qos == THREAD_QOS_UNSPECIFIED || qos >= THREAD_QOS_LAST) { - return EINVAL; - } - - *retval = pthread_kern->qos_max_parallelism(qos, flags); - return 0; -} - -int -_bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval) -{ - switch (cmd) { - case BSDTHREAD_CTL_SET_QOS: - return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval); - case BSDTHREAD_CTL_QOS_OVERRIDE_START: - return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval); - case BSDTHREAD_CTL_QOS_OVERRIDE_END: - return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval); - case BSDTHREAD_CTL_QOS_OVERRIDE_RESET: - return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval); - case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH: - return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval); - case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD: - return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval); - case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET: - return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, (int)arg1, arg2, arg3, retval); - case BSDTHREAD_CTL_SET_SELF: - return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval); - case BSDTHREAD_CTL_QOS_MAX_PARALLELISM: - return _bsdthread_ctl_max_parallelism(p, cmd, (int)arg1, (unsigned long)arg2, retval); - default: - return EINVAL; - } -} - -#pragma mark - Workqueue Implementation - -#pragma mark wq_flags - -static inline uint32_t -_wq_flags(struct workqueue *wq) -{ - return atomic_load_explicit(&wq->wq_flags, memory_order_relaxed); -} - -static inline bool -_wq_exiting(struct workqueue *wq) -{ - return _wq_flags(wq) & WQ_EXITING; -} - -static inline uint32_t -_wq_flags_or_orig(struct workqueue *wq, uint32_t v) -{ -#if PTHREAD_INLINE_RMW_ATOMICS - uint32_t state; - do { - state = _wq_flags(wq); - } while (!OSCompareAndSwap(state, state | v, &wq->wq_flags)); - return state; -#else - return atomic_fetch_or_explicit(&wq->wq_flags, v, memory_order_relaxed); -#endif -} - -static inline uint32_t -_wq_flags_and_orig(struct workqueue *wq, uint32_t v) -{ -#if PTHREAD_INLINE_RMW_ATOMICS - uint32_t state; - do { - state = _wq_flags(wq); - } while (!OSCompareAndSwap(state, state & v, &wq->wq_flags)); - return state; -#else - return atomic_fetch_and_explicit(&wq->wq_flags, v, memory_order_relaxed); -#endif -} - -static inline bool -WQ_TIMER_DELAYED_NEEDED(struct workqueue *wq) -{ - uint32_t oldflags, newflags; - do { - oldflags = _wq_flags(wq); - if (oldflags & (WQ_EXITING | WQ_ATIMER_DELAYED_RUNNING)) { - return false; - } - newflags = oldflags | WQ_ATIMER_DELAYED_RUNNING; - } while (!OSCompareAndSwap(oldflags, newflags, &wq->wq_flags)); - return true; -} - -static inline bool -WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue *wq) -{ - uint32_t oldflags, newflags; - do { - oldflags = _wq_flags(wq); - if (oldflags & (WQ_EXITING | WQ_ATIMER_IMMEDIATE_RUNNING)) { - return false; - } - newflags = oldflags | WQ_ATIMER_IMMEDIATE_RUNNING; - } while (!OSCompareAndSwap(oldflags, newflags, &wq->wq_flags)); - return true; -} - -#pragma mark thread requests pacing - -static inline uint32_t -_wq_pacing_shift_for_pri(int pri) -{ - return _wq_bucket_to_thread_qos(pri) - 1; -} - -static inline int -_wq_highest_paced_priority(struct workqueue *wq) -{ - uint8_t paced = wq->wq_paced; - int msb = paced ? 32 - __builtin_clz(paced) : 0; // fls(paced) == bit + 1 - return WORKQUEUE_EVENT_MANAGER_BUCKET - msb; -} - -static inline uint8_t -_wq_pacing_bit_for_pri(int pri) -{ - return 1u << _wq_pacing_shift_for_pri(pri); -} - -static inline bool -_wq_should_pace_priority(struct workqueue *wq, int pri) -{ - return wq->wq_paced >= _wq_pacing_bit_for_pri(pri); -} - -static inline void -_wq_pacing_start(struct workqueue *wq, struct threadlist *tl) -{ - uint8_t bit = _wq_pacing_bit_for_pri(tl->th_priority); - assert((tl->th_flags & TH_LIST_PACING) == 0); - assert((wq->wq_paced & bit) == 0); - wq->wq_paced |= bit; - tl->th_flags |= TH_LIST_PACING; -} - -static inline bool -_wq_pacing_end(struct workqueue *wq, struct threadlist *tl) -{ - if (tl->th_flags & TH_LIST_PACING) { - uint8_t bit = _wq_pacing_bit_for_pri(tl->th_priority); - assert((wq->wq_paced & bit) != 0); - wq->wq_paced ^= bit; - tl->th_flags &= ~TH_LIST_PACING; - return wq->wq_paced < bit; // !_wq_should_pace_priority - } - return false; -} - -#pragma mark thread requests - -static void -_threadreq_init_alloced(struct threadreq *req, int priority, int flags) -{ - assert((flags & TR_FLAG_ONSTACK) == 0); - req->tr_state = TR_STATE_NEW; - req->tr_priority = priority; - req->tr_flags = flags; -} - -static void -_threadreq_init_stack(struct threadreq *req, int priority, int flags) -{ - req->tr_state = TR_STATE_NEW; - req->tr_priority = priority; - req->tr_flags = flags | TR_FLAG_ONSTACK; -} - -static void -_threadreq_copy_prepare(struct workqueue *wq) -{ -again: - if (wq->wq_cached_threadreq) { - return; - } - - workqueue_unlock(wq); - struct threadreq *req = zalloc(pthread_zone_threadreq); - workqueue_lock_spin(wq); - - if (wq->wq_cached_threadreq) { - /* - * We lost the race and someone left behind an extra threadreq for us - * to use. Throw away our request and retry. - */ - workqueue_unlock(wq); - zfree(pthread_zone_threadreq, req); - workqueue_lock_spin(wq); - goto again; - } else { - wq->wq_cached_threadreq = req; - } - - assert(wq->wq_cached_threadreq); -} - -static bool -_threadreq_copy_prepare_noblock(struct workqueue *wq) -{ - if (wq->wq_cached_threadreq) { - return true; - } - - wq->wq_cached_threadreq = zalloc_noblock(pthread_zone_threadreq); - - return wq->wq_cached_threadreq != NULL; -} - -static inline struct threadreq_head * -_threadreq_list_for_req(struct workqueue *wq, const struct threadreq *req) -{ - if (req->tr_flags & TR_FLAG_OVERCOMMIT) { - return &wq->wq_overcommit_reqlist[req->tr_priority]; - } else { - return &wq->wq_reqlist[req->tr_priority]; - } -} - -static void -_threadreq_enqueue(struct workqueue *wq, struct threadreq *req) -{ - assert(req && req->tr_state == TR_STATE_NEW); - if (req->tr_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { - assert(wq->wq_event_manager_threadreq.tr_state != TR_STATE_WAITING); - memcpy(&wq->wq_event_manager_threadreq, req, sizeof(struct threadreq)); - req = &wq->wq_event_manager_threadreq; - req->tr_flags &= ~(TR_FLAG_ONSTACK | TR_FLAG_NO_PACING); - } else { - if (req->tr_flags & TR_FLAG_ONSTACK) { - assert(wq->wq_cached_threadreq); - struct threadreq *newreq = wq->wq_cached_threadreq; - wq->wq_cached_threadreq = NULL; - - memcpy(newreq, req, sizeof(struct threadreq)); - newreq->tr_flags &= ~(TR_FLAG_ONSTACK | TR_FLAG_NO_PACING); - req->tr_state = TR_STATE_DEAD; - req = newreq; - } - TAILQ_INSERT_TAIL(_threadreq_list_for_req(wq, req), req, tr_entry); - } - req->tr_state = TR_STATE_WAITING; - wq->wq_reqcount++; -} - -static void -_threadreq_dequeue(struct workqueue *wq, struct threadreq *req) -{ - if (req->tr_priority != WORKQUEUE_EVENT_MANAGER_BUCKET) { - struct threadreq_head *req_list = _threadreq_list_for_req(wq, req); -#if DEBUG - struct threadreq *cursor = NULL; - TAILQ_FOREACH(cursor, req_list, tr_entry) { - if (cursor == req) break; - } - assert(cursor == req); -#endif - TAILQ_REMOVE(req_list, req, tr_entry); - } - wq->wq_reqcount--; -} - -/* - * Mark a thread request as complete. At this point, it is treated as owned by - * the submitting subsystem and you should assume it could be freed. - * - * Called with the workqueue lock held. - */ -static int -_threadreq_complete_and_unlock(proc_t p, struct workqueue *wq, - struct threadreq *req, struct threadlist *tl) -{ - struct threadreq *req_tofree = NULL; - bool sync = (req->tr_state == TR_STATE_NEW); - bool workloop = req->tr_flags & TR_FLAG_WORKLOOP; - bool onstack = req->tr_flags & TR_FLAG_ONSTACK; - bool kevent = req->tr_flags & TR_FLAG_KEVENT; - bool unbinding = tl->th_flags & TH_LIST_UNBINDING; - bool locked = true; - bool waking_parked_thread = (tl->th_flags & TH_LIST_BUSY); - int ret; - - req->tr_state = TR_STATE_COMPLETE; - - if (!workloop && !onstack && req != &wq->wq_event_manager_threadreq) { - if (wq->wq_cached_threadreq) { - req_tofree = req; - } else { - wq->wq_cached_threadreq = req; - } - } - - if (tl->th_flags & TH_LIST_UNBINDING) { - tl->th_flags &= ~TH_LIST_UNBINDING; - assert((tl->th_flags & TH_LIST_KEVENT_BOUND)); - } else if (workloop || kevent) { - assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0); - tl->th_flags |= TH_LIST_KEVENT_BOUND; - } - - if (workloop) { - workqueue_unlock(wq); - ret = pthread_kern->workloop_fulfill_threadreq(wq->wq_proc, (void*)req, - tl->th_thread, sync ? WORKLOOP_FULFILL_THREADREQ_SYNC : 0); - assert(ret == 0); - locked = false; - } else if (kevent) { - unsigned int kevent_flags = KEVENT_FLAG_WORKQ; - if (sync) { - kevent_flags |= KEVENT_FLAG_SYNCHRONOUS_BIND; - } - if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { - kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER; - } - workqueue_unlock(wq); - ret = kevent_qos_internal_bind(wq->wq_proc, - class_index_get_thread_qos(tl->th_priority), tl->th_thread, - kevent_flags); - if (ret != 0) { - workqueue_lock_spin(wq); - tl->th_flags &= ~TH_LIST_KEVENT_BOUND; - locked = true; - } else { - locked = false; - } - } - - /* - * Run Thread, Run! - */ - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 0, 0, 0, 0); - PTHREAD_TRACE_WQ_REQ(TRACE_wq_runitem | DBG_FUNC_START, wq, req, tl->th_priority, - thread_tid(current_thread()), thread_tid(tl->th_thread)); - - if (waking_parked_thread) { - if (!locked) { - workqueue_lock_spin(wq); - } - tl->th_flags &= ~(TH_LIST_BUSY); - if ((tl->th_flags & TH_LIST_REMOVING_VOUCHER) == 0) { - /* - * If the thread is in the process of removing its voucher, then it - * isn't actually in the wait event yet and we don't need to wake - * it up. Save the trouble (and potential lock-ordering issues - * (see 30617015)). - */ - thread_wakeup_thread(tl, tl->th_thread); - } - workqueue_unlock(wq); - - if (req_tofree) zfree(pthread_zone_threadreq, req_tofree); - return WQ_RUN_TR_THREAD_STARTED; - } - - assert ((tl->th_flags & TH_LIST_PACING) == 0); - if (locked) { - workqueue_unlock(wq); - } - if (req_tofree) zfree(pthread_zone_threadreq, req_tofree); - if (unbinding) { - return WQ_RUN_TR_THREAD_STARTED; - } - _setup_wqthread(p, tl->th_thread, wq, tl, WQ_SETUP_CLEAR_VOUCHER); - pthread_kern->unix_syscall_return(EJUSTRETURN); - __builtin_unreachable(); -} - -/* - * Mark a thread request as cancelled. Has similar ownership semantics to the - * complete call above. - */ -static void -_threadreq_cancel(struct workqueue *wq, struct threadreq *req) -{ - assert(req->tr_state == TR_STATE_WAITING); - req->tr_state = TR_STATE_DEAD; - - assert((req->tr_flags & TR_FLAG_ONSTACK) == 0); - if (req->tr_flags & TR_FLAG_WORKLOOP) { - __assert_only int ret; - ret = pthread_kern->workloop_fulfill_threadreq(wq->wq_proc, (void*)req, - THREAD_NULL, WORKLOOP_FULFILL_THREADREQ_CANCEL); - assert(ret == 0 || ret == ECANCELED); - } else if (req != &wq->wq_event_manager_threadreq) { - zfree(pthread_zone_threadreq, req); - } -} - -#pragma mark workqueue lock - -static boolean_t workqueue_lock_spin_is_acquired_kdp(struct workqueue *wq) { - return kdp_lck_spin_is_acquired(&wq->wq_lock); -} - -static void -workqueue_lock_spin(struct workqueue *wq) -{ - assert(ml_get_interrupts_enabled() == TRUE); - lck_spin_lock(&wq->wq_lock); -} - -static bool -workqueue_lock_try(struct workqueue *wq) -{ - return lck_spin_try_lock(&wq->wq_lock); -} - -static void -workqueue_unlock(struct workqueue *wq) -{ - lck_spin_unlock(&wq->wq_lock); -} - -#pragma mark workqueue add timer - -/** - * Sets up the timer which will call out to workqueue_add_timer - */ -static void -workqueue_interval_timer_start(struct workqueue *wq) -{ - uint64_t deadline; - - /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the - ATIMER_RUNNING flag is not present. The net effect here is that if a - sequence of threads is required, we'll double the time before we give out - the next one. */ - if (wq->wq_timer_interval == 0) { - wq->wq_timer_interval = wq_stalled_window_usecs; - - } else { - wq->wq_timer_interval = wq->wq_timer_interval * 2; - - if (wq->wq_timer_interval > wq_max_timer_interval_usecs) { - wq->wq_timer_interval = wq_max_timer_interval_usecs; - } - } - clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline); - - PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, - _wq_flags(wq), wq->wq_timer_interval, 0); - - thread_call_t call = wq->wq_atimer_delayed_call; - if (thread_call_enter1_delayed(call, call, deadline)) { - panic("delayed_call was already enqueued"); - } -} - -/** - * Immediately trigger the workqueue_add_timer - */ -static void -workqueue_interval_timer_trigger(struct workqueue *wq) -{ - PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, - _wq_flags(wq), 0, 0); - - thread_call_t call = wq->wq_atimer_immediate_call; - if (thread_call_enter1(call, call)) { - panic("immediate_call was already enqueued"); - } -} - -/** - * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts - */ -static boolean_t -wq_thread_is_busy(uint64_t cur_ts, _Atomic uint64_t *lastblocked_tsp) -{ - clock_sec_t secs; - clock_usec_t usecs; - uint64_t lastblocked_ts; - uint64_t elapsed; - - lastblocked_ts = atomic_load_explicit(lastblocked_tsp, memory_order_relaxed); - if (lastblocked_ts >= cur_ts) { - /* - * because the update of the timestamp when a thread blocks isn't - * serialized against us looking at it (i.e. we don't hold the workq lock) - * it's possible to have a timestamp that matches the current time or - * that even looks to be in the future relative to when we grabbed the current - * time... just treat this as a busy thread since it must have just blocked. - */ - return (TRUE); - } - elapsed = cur_ts - lastblocked_ts; - - pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs); - - return (secs == 0 && usecs < wq_stalled_window_usecs); -} - -/** - * handler function for the timer - */ -static void -workqueue_add_timer(struct workqueue *wq, thread_call_t thread_call_self) -{ - proc_t p = wq->wq_proc; - - workqueue_lock_spin(wq); - - PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq, - _wq_flags(wq), wq->wq_nthreads, wq->wq_thidlecount, 0); - - /* - * There's two tricky issues here. - * - * First issue: we start the thread_call's that invoke this routine without - * the workqueue lock held. The scheduler callback needs to trigger - * reevaluation of the number of running threads but shouldn't take that - * lock, so we can't use it to synchronize state around the thread_call. - * As a result, it might re-enter the thread_call while this routine is - * already running. This could cause it to fire a second time and we'll - * have two add_timers running at once. Obviously, we don't want that to - * keep stacking, so we need to keep it at two timers. - * - * Solution: use wq_flags (accessed via atomic CAS) to synchronize the - * enqueue of the thread_call itself. When a thread needs to trigger the - * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets - * the flag then does a thread_call_enter. We'll then remove that flag - * only once we've got the lock and it's safe for the thread_call to be - * entered again. - * - * Second issue: we need to make sure that the two timers don't execute this - * routine concurrently. We can't use the workqueue lock for this because - * we'll need to drop it during our execution. - * - * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that - * we are currently executing the routine and the next thread should wait. - * - * After all that, we arrive at the following four possible states: - * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY no pending timer, no active timer - * !WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY no pending timer, 1 active timer - * WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY 1 pending timer, no active timer - * WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY 1 pending timer, 1 active timer - * - * Further complication sometimes we need to trigger this function to run - * without delay. Because we aren't under a lock between setting - * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply - * re-enter the thread call: if thread_call_enter() returned false, we - * wouldn't be able to distinguish the case where the thread_call had - * already fired from the case where it hadn't been entered yet from the - * other thread. So, we use a separate thread_call for immediate - * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING. - */ - - while (wq->wq_lflags & WQL_ATIMER_BUSY) { - wq->wq_lflags |= WQL_ATIMER_WAITING; - - assert_wait((caddr_t)wq, (THREAD_UNINT)); - workqueue_unlock(wq); - - thread_block(THREAD_CONTINUE_NULL); - - workqueue_lock_spin(wq); - } - /* - * Prevent _workqueue_mark_exiting() from going away - */ - wq->wq_lflags |= WQL_ATIMER_BUSY; - - /* - * Decide which timer we are and remove the RUNNING flag. - */ - if (thread_call_self == wq->wq_atimer_delayed_call) { - uint64_t wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_DELAYED_RUNNING); - if ((wq_flags & WQ_ATIMER_DELAYED_RUNNING) == 0) { - panic("workqueue_add_timer(delayed) w/o WQ_ATIMER_DELAYED_RUNNING"); - } - } else if (thread_call_self == wq->wq_atimer_immediate_call) { - uint64_t wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_IMMEDIATE_RUNNING); - if ((wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) == 0) { - panic("workqueue_add_timer(immediate) w/o WQ_ATIMER_IMMEDIATE_RUNNING"); - } - } else { - panic("workqueue_add_timer can't figure out which timer it is"); - } - - int ret = WQ_RUN_TR_THREAD_STARTED; - while (ret == WQ_RUN_TR_THREAD_STARTED && wq->wq_reqcount) { - ret = workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true); - - workqueue_lock_spin(wq); - } - _threadreq_copy_prepare(wq); - - /* - * If we called WQ_TIMER_NEEDED above, then this flag will be set if that - * call marked the timer running. If so, we let the timer interval grow. - * Otherwise, we reset it back to 0. - */ - uint32_t wq_flags = _wq_flags(wq); - if (!(wq_flags & WQ_ATIMER_DELAYED_RUNNING)) { - wq->wq_timer_interval = 0; - } - - wq->wq_lflags &= ~WQL_ATIMER_BUSY; - - if ((wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) { - /* - * wakeup the thread hung up in _workqueue_mark_exiting or - * workqueue_add_timer waiting for this timer to finish getting out of - * the way - */ - wq->wq_lflags &= ~WQL_ATIMER_WAITING; - wakeup(wq); - } - - PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, 0, wq->wq_nthreads, wq->wq_thidlecount, 0); - - workqueue_unlock(wq); -} - -#pragma mark thread state tracking - -// called by spinlock code when trying to yield to lock owner -void -_workqueue_thread_yielded(void) -{ -} - -static void -workqueue_callback(int type, thread_t thread) -{ - struct uthread *uth = pthread_kern->get_bsdthread_info(thread); - struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); - struct workqueue *wq = tl->th_workq; - uint32_t old_count, req_qos, qos = tl->th_priority; - wq_thactive_t old_thactive; - - switch (type) { - case SCHED_CALL_BLOCK: { - bool start_timer = false; - - old_thactive = _wq_thactive_dec(wq, tl->th_priority); - req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive); - old_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive, - qos, NULL, NULL); - - if (old_count == wq_max_concurrency[tl->th_priority]) { - /* - * The number of active threads at this priority has fallen below - * the maximum number of concurrent threads that are allowed to run - * - * if we collide with another thread trying to update the - * last_blocked (really unlikely since another thread would have to - * get scheduled and then block after we start down this path), it's - * not a problem. Either timestamp is adequate, so no need to retry - */ - atomic_store_explicit(&wq->wq_lastblocked_ts[qos], - mach_absolute_time(), memory_order_relaxed); - } - - if (req_qos == WORKQUEUE_EVENT_MANAGER_BUCKET || qos > req_qos) { - /* - * The blocking thread is at a lower QoS than the highest currently - * pending constrained request, nothing has to be redriven - */ - } else { - uint32_t max_busycount, old_req_count; - old_req_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive, - req_qos, NULL, &max_busycount); - /* - * If it is possible that may_start_constrained_thread had refused - * admission due to being over the max concurrency, we may need to - * spin up a new thread. - * - * We take into account the maximum number of busy threads - * that can affect may_start_constrained_thread as looking at the - * actual number may_start_constrained_thread will see is racy. - * - * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is - * between NCPU (4) and NCPU - 2 (2) we need to redrive. - */ - if (wq_max_concurrency[req_qos] <= old_req_count + max_busycount && - old_req_count <= wq_max_concurrency[req_qos]) { - if (WQ_TIMER_DELAYED_NEEDED(wq)) { - start_timer = true; - workqueue_interval_timer_start(wq); - } - } - } - - PTHREAD_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq, - old_count - 1, qos | (req_qos << 8), - wq->wq_reqcount << 1 | start_timer, 0); - break; - } - case SCHED_CALL_UNBLOCK: { - /* - * we cannot take the workqueue_lock here... - * an UNBLOCK can occur from a timer event which - * is run from an interrupt context... if the workqueue_lock - * is already held by this processor, we'll deadlock... - * the thread lock for the thread being UNBLOCKED - * is also held - */ - old_thactive = _wq_thactive_inc(wq, qos); - if (pthread_debug_tracing) { - req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive); - old_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive, - qos, NULL, NULL); - PTHREAD_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq, - old_count + 1, qos | (req_qos << 8), - wq->wq_threads_scheduled, 0); - } - break; - } - } -} - -sched_call_t -_workqueue_get_sched_callback(void) -{ - return workqueue_callback; -} - -#pragma mark thread addition/removal - -static mach_vm_size_t -_workqueue_allocsize(struct workqueue *wq) -{ - proc_t p = wq->wq_proc; - mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map); - mach_vm_size_t pthread_size = - vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map)); - return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size; -} - -/** - * pop goes the thread - * - * If fromexit is set, the call is from workqueue_exit(, - * so some cleanups are to be avoided. - */ -static void -workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use) -{ - struct uthread * uth; - struct workqueue * wq = tl->th_workq; - - if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){ - TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry); - } else { - TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); - } - - if (fromexit == 0) { - assert(wq->wq_nthreads && wq->wq_thidlecount); - wq->wq_nthreads--; - wq->wq_thidlecount--; - } - - /* - * Clear the threadlist pointer in uthread so - * blocked thread on wakeup for termination will - * not access the thread list as it is going to be - * freed. - */ - pthread_kern->thread_sched_call(tl->th_thread, NULL); - - uth = pthread_kern->get_bsdthread_info(tl->th_thread); - if (uth != (struct uthread *)0) { - pthread_kern->uthread_set_threadlist(uth, NULL); - } - if (fromexit == 0) { - /* during exit the lock is not held */ - workqueue_unlock(wq); - } - - if ( (tl->th_flags & TH_LIST_NEW) || first_use ) { - /* - * thread was created, but never used... - * need to clean up the stack and port ourselves - * since we're not going to spin up through the - * normal exit path triggered from Libc - */ - if (fromexit == 0) { - /* vm map is already deallocated when this is called from exit */ - (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, _workqueue_allocsize(wq)); - } - (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport); - } - /* - * drop our ref on the thread - */ - thread_deallocate(tl->th_thread); - - zfree(pthread_zone_threadlist, tl); -} - - -/** - * Try to add a new workqueue thread. - * - * - called with workq lock held - * - dropped and retaken around thread creation - * - return with workq lock held - */ -static bool -workqueue_addnewthread(proc_t p, struct workqueue *wq) -{ - kern_return_t kret; - - wq->wq_nthreads++; - - workqueue_unlock(wq); - - struct threadlist *tl = zalloc(pthread_zone_threadlist); - bzero(tl, sizeof(struct threadlist)); - - thread_t th; - kret = pthread_kern->thread_create_workq_waiting(wq->wq_task, wq_unpark_continue, tl, &th); - if (kret != KERN_SUCCESS) { - PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 0, 0, 0); - goto fail_free; - } - - mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p); - - mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map); - mach_vm_size_t pthread_size = - vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map)); - mach_vm_size_t th_allocsize = guardsize + PTH_DEFAULT_STACKSIZE + pthread_size; - - kret = mach_vm_map(wq->wq_map, &stackaddr, - th_allocsize, page_size-1, - VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, NULL, - 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, - VM_INHERIT_DEFAULT); - - if (kret != KERN_SUCCESS) { - kret = mach_vm_allocate(wq->wq_map, - &stackaddr, th_allocsize, - VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE); - } - - if (kret != KERN_SUCCESS) { - PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 1, 0, 0); - goto fail_terminate; - } - - /* - * The guard page is at the lowest address - * The stack base is the highest address - */ - kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE); - if (kret != KERN_SUCCESS) { - PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 2, 0, 0); - goto fail_vm_deallocate; - } - - - pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE); - pthread_kern->thread_static_param(th, TRUE); - - /* - * convert_thread_to_port() consumes a reference - */ - thread_reference(th); - void *sright = (void *)pthread_kern->convert_thread_to_port(th); - tl->th_thport = pthread_kern->ipc_port_copyout_send(sright, - pthread_kern->task_get_ipcspace(wq->wq_task)); - - tl->th_flags = TH_LIST_INITED | TH_LIST_NEW; - tl->th_thread = th; - tl->th_workq = wq; - tl->th_stackaddr = stackaddr; - tl->th_priority = WORKQUEUE_NUM_BUCKETS; - - struct uthread *uth; - uth = pthread_kern->get_bsdthread_info(tl->th_thread); - - workqueue_lock_spin(wq); - - void *current_tl = pthread_kern->uthread_get_threadlist(uth); - if (current_tl == NULL) { - pthread_kern->uthread_set_threadlist(uth, tl); - TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry); - wq->wq_thidlecount++; - } else if (current_tl == WQ_THREADLIST_EXITING_POISON) { - /* - * Failed thread creation race: The thread already woke up and has exited. - */ - PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 3, 0, 0); - goto fail_unlock; - } else { - panic("Unexpected initial threadlist value"); - } - - PTHREAD_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0); - - return (TRUE); - -fail_unlock: - workqueue_unlock(wq); - (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), - tl->th_thport); - -fail_vm_deallocate: - (void) mach_vm_deallocate(wq->wq_map, stackaddr, th_allocsize); - -fail_terminate: - if (pthread_kern->thread_will_park_or_terminate) { - pthread_kern->thread_will_park_or_terminate(th); - } - (void)thread_terminate(th); - thread_deallocate(th); - -fail_free: - zfree(pthread_zone_threadlist, tl); - - workqueue_lock_spin(wq); - wq->wq_nthreads--; - - return (FALSE); -} - -/** - * Setup per-process state for the workqueue. - */ -int -_workq_open(struct proc *p, __unused int32_t *retval) -{ - struct workqueue * wq; - char * ptr; - uint32_t num_cpus; - int error = 0; - - if (pthread_kern->proc_get_register(p) == 0) { - return EINVAL; - } - - num_cpus = pthread_kern->ml_get_max_cpus(); - - if (wq_init_constrained_limit) { - uint32_t limit; - /* - * set up the limit for the constrained pool - * this is a virtual pool in that we don't - * maintain it on a separate idle and run list - */ - limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR; - - if (limit > wq_max_constrained_threads) - wq_max_constrained_threads = limit; - - wq_init_constrained_limit = 0; - - if (wq_max_threads > WQ_THACTIVE_BUCKET_HALF) { - wq_max_threads = WQ_THACTIVE_BUCKET_HALF; - } - if (wq_max_threads > pthread_kern->config_thread_max - 20) { - wq_max_threads = pthread_kern->config_thread_max - 20; - } - } - - if (pthread_kern->proc_get_wqptr(p) == NULL) { - if (pthread_kern->proc_init_wqptr_or_wait(p) == FALSE) { - assert(pthread_kern->proc_get_wqptr(p) != NULL); - goto out; - } - - ptr = (char *)zalloc(pthread_zone_workqueue); - bzero(ptr, sizeof(struct workqueue)); - - wq = (struct workqueue *)ptr; - wq->wq_proc = p; - wq->wq_task = current_task(); - wq->wq_map = pthread_kern->current_map(); - - // Start the event manager at the priority hinted at by the policy engine - int mgr_priority_hint = pthread_kern->task_get_default_manager_qos(current_task()); - wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - - TAILQ_INIT(&wq->wq_thrunlist); - TAILQ_INIT(&wq->wq_thidlelist); - for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) { - TAILQ_INIT(&wq->wq_overcommit_reqlist[i]); - TAILQ_INIT(&wq->wq_reqlist[i]); - } - - wq->wq_atimer_delayed_call = - thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer, - (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL); - wq->wq_atimer_immediate_call = - thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer, - (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL); - - lck_spin_init(&wq->wq_lock, pthread_lck_grp, pthread_lck_attr); - - wq->wq_cached_threadreq = zalloc(pthread_zone_threadreq); - *(wq_thactive_t *)&wq->wq_thactive = - (wq_thactive_t)WQ_THACTIVE_NO_PENDING_REQUEST << - WQ_THACTIVE_QOS_SHIFT; - - pthread_kern->proc_set_wqptr(p, wq); - - } -out: - - return(error); -} - -/* - * Routine: workqueue_mark_exiting - * - * Function: Mark the work queue such that new threads will not be added to the - * work queue after we return. - * - * Conditions: Called against the current process. - */ -void -_workqueue_mark_exiting(struct proc *p) -{ - struct workqueue *wq = pthread_kern->proc_get_wqptr(p); - if (!wq) return; - - PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0); - - workqueue_lock_spin(wq); - - /* - * We arm the add timer without holding the workqueue lock so we need - * to synchronize with any running or soon to be running timers. - * - * Threads that intend to arm the timer atomically OR - * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if - * WQ_EXITING is not present. So, once we have set WQ_EXITING, we can - * be sure that no new RUNNING flags will be set, but still need to - * wait for the already running timers to complete. - * - * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so - * the check for and sleep until clear is protected. - */ - uint64_t wq_flags = _wq_flags_or_orig(wq, WQ_EXITING); - - if (wq_flags & WQ_ATIMER_DELAYED_RUNNING) { - if (thread_call_cancel(wq->wq_atimer_delayed_call) == TRUE) { - wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_DELAYED_RUNNING); - } - } - if (wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) { - if (thread_call_cancel(wq->wq_atimer_immediate_call) == TRUE) { - wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_IMMEDIATE_RUNNING); - } - } - while ((_wq_flags(wq) & (WQ_ATIMER_DELAYED_RUNNING | WQ_ATIMER_IMMEDIATE_RUNNING)) || - (wq->wq_lflags & WQL_ATIMER_BUSY)) { - assert_wait((caddr_t)wq, (THREAD_UNINT)); - workqueue_unlock(wq); - - thread_block(THREAD_CONTINUE_NULL); - - workqueue_lock_spin(wq); - } - - /* - * Save off pending requests, will complete/free them below after unlocking - */ - TAILQ_HEAD(, threadreq) local_list = TAILQ_HEAD_INITIALIZER(local_list); - - for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) { - TAILQ_CONCAT(&local_list, &wq->wq_overcommit_reqlist[i], tr_entry); - TAILQ_CONCAT(&local_list, &wq->wq_reqlist[i], tr_entry); - } - - /* - * XXX: Can't deferred cancel the event manager request, so just smash it. - */ - assert((wq->wq_event_manager_threadreq.tr_flags & TR_FLAG_WORKLOOP) == 0); - wq->wq_event_manager_threadreq.tr_state = TR_STATE_DEAD; - - workqueue_unlock(wq); - - struct threadreq *tr, *tr_temp; - TAILQ_FOREACH_SAFE(tr, &local_list, tr_entry, tr_temp) { - _threadreq_cancel(wq, tr); - } - PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0); -} - -/* - * Routine: workqueue_exit - * - * Function: clean up the work queue structure(s) now that there are no threads - * left running inside the work queue (except possibly current_thread). - * - * Conditions: Called by the last thread in the process. - * Called against current process. - */ -void -_workqueue_exit(struct proc *p) -{ - struct workqueue * wq; - struct threadlist * tl, *tlist; - struct uthread *uth; - - wq = pthread_kern->proc_get_wqptr(p); - if (wq != NULL) { - - PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0); - - pthread_kern->proc_set_wqptr(p, NULL); - - /* - * Clean up workqueue data structures for threads that exited and - * didn't get a chance to clean up after themselves. - */ - TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) { - assert((tl->th_flags & TH_LIST_RUNNING) != 0); - - pthread_kern->thread_sched_call(tl->th_thread, NULL); - - uth = pthread_kern->get_bsdthread_info(tl->th_thread); - if (uth != (struct uthread *)0) { - pthread_kern->uthread_set_threadlist(uth, NULL); - } - TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry); - - /* - * drop our last ref on the thread - */ - thread_deallocate(tl->th_thread); - - zfree(pthread_zone_threadlist, tl); - } - TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) { - assert((tl->th_flags & TH_LIST_RUNNING) == 0); - assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET); - workqueue_removethread(tl, true, false); - } - TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlemgrlist, th_entry, tlist) { - assert((tl->th_flags & TH_LIST_RUNNING) == 0); - assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET); - workqueue_removethread(tl, true, false); - } - if (wq->wq_cached_threadreq) { - zfree(pthread_zone_threadreq, wq->wq_cached_threadreq); - } - thread_call_free(wq->wq_atimer_delayed_call); - thread_call_free(wq->wq_atimer_immediate_call); - lck_spin_destroy(&wq->wq_lock, pthread_lck_grp); - - for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) { - assert(TAILQ_EMPTY(&wq->wq_overcommit_reqlist[i])); - assert(TAILQ_EMPTY(&wq->wq_reqlist[i])); - } - - zfree(pthread_zone_workqueue, wq); - - PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0); - } -} - - -#pragma mark workqueue thread manipulation - - -/** - * Entry point for libdispatch to ask for threads - */ -static int -wqops_queue_reqthreads(struct proc *p, int reqcount, - pthread_priority_t priority) -{ - bool overcommit = _pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG; - bool event_manager = _pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - int class = event_manager ? WORKQUEUE_EVENT_MANAGER_BUCKET : - pthread_priority_get_class_index(priority); - - if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS) || - (overcommit && event_manager)) { - return EINVAL; - } - - struct workqueue *wq; - if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) { - return EINVAL; - } - - workqueue_lock_spin(wq); - _threadreq_copy_prepare(wq); - - PTHREAD_TRACE_WQ(TRACE_wq_wqops_reqthreads | DBG_FUNC_NONE, wq, reqcount, priority, 0, 0); - - int tr_flags = 0; - if (overcommit) tr_flags |= TR_FLAG_OVERCOMMIT; - if (reqcount > 1) { - /* - * when libdispatch asks for more than one thread, it wants to achieve - * parallelism. Pacing would be detrimental to this ask, so treat - * these specially to not do the pacing admission check - */ - tr_flags |= TR_FLAG_NO_PACING; - } - - while (reqcount-- && !_wq_exiting(wq)) { - struct threadreq req; - _threadreq_init_stack(&req, class, tr_flags); - - workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, true); - - workqueue_lock_spin(wq); /* reacquire */ - _threadreq_copy_prepare(wq); - } - - workqueue_unlock(wq); - - return 0; -} - -/* - * Used by the kevent system to request threads. - * - * Currently count is ignored and we always return one thread per invocation. - */ -static thread_t -_workq_kevent_reqthreads(struct proc *p, pthread_priority_t priority, - bool no_emergency) -{ - int wq_run_tr = WQ_RUN_TR_THROTTLED; - bool emergency_thread = false; - struct threadreq req; - - - struct workqueue *wq; - if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) { - return THREAD_NULL; - } - - int class = pthread_priority_get_class_index(priority); - - workqueue_lock_spin(wq); - bool has_threadreq = _threadreq_copy_prepare_noblock(wq); - - PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, NULL, priority, 0, 0); - - /* - * Skip straight to event manager if that's what was requested - */ - if ((_pthread_priority_get_qos_newest(priority) == QOS_CLASS_UNSPECIFIED) || - (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)){ - goto event_manager; - } - - bool will_pace = _wq_should_pace_priority(wq, class); - if ((wq->wq_thidlecount == 0 || will_pace) && has_threadreq == false) { - /* - * We'll need to persist the request and can't, so return the emergency - * thread instead, which has a persistent request object. - */ - emergency_thread = true; - goto event_manager; - } - - /* - * Handle overcommit requests - */ - if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){ - _threadreq_init_stack(&req, class, TR_FLAG_KEVENT | TR_FLAG_OVERCOMMIT); - wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false); - goto done; - } - - /* - * Handle constrained requests - */ - boolean_t may_start = may_start_constrained_thread(wq, class, NULL, false); - if (may_start || no_emergency) { - _threadreq_init_stack(&req, class, TR_FLAG_KEVENT); - wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false); - goto done; - } else { - emergency_thread = true; - } - - -event_manager: - _threadreq_init_stack(&req, WORKQUEUE_EVENT_MANAGER_BUCKET, TR_FLAG_KEVENT); - wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false); - -done: - if (wq_run_tr == WQ_RUN_TR_THREAD_NEEDED && WQ_TIMER_IMMEDIATE_NEEDED(wq)) { - workqueue_interval_timer_trigger(wq); - } - return emergency_thread ? (void*)-1 : 0; -} - -thread_t -_workq_reqthreads(struct proc *p, __assert_only int requests_count, - workq_reqthreads_req_t request) -{ - assert(requests_count == 1); - - pthread_priority_t priority = request->priority; - bool no_emergency = request->count & WORKQ_REQTHREADS_NOEMERGENCY; - - return _workq_kevent_reqthreads(p, priority, no_emergency); -} - - -int -workq_kern_threadreq(struct proc *p, workq_threadreq_t _req, - enum workq_threadreq_type type, unsigned long priority, int flags) -{ - struct workqueue *wq; - int ret; - - if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) { - return EINVAL; - } - - switch (type) { - case WORKQ_THREADREQ_KEVENT: { - bool no_emergency = flags & WORKQ_THREADREQ_FLAG_NOEMERGENCY; - (void)_workq_kevent_reqthreads(p, priority, no_emergency); - return 0; - } - case WORKQ_THREADREQ_WORKLOOP: - case WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL: { - struct threadreq *req = (struct threadreq *)_req; - int req_class = pthread_priority_get_class_index(priority); - int req_flags = TR_FLAG_WORKLOOP; - if ((_pthread_priority_get_flags(priority) & - _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){ - req_flags |= TR_FLAG_OVERCOMMIT; - } - - thread_t thread = current_thread(); - struct threadlist *tl = util_get_thread_threadlist_entry(thread); - - if (tl && tl != WQ_THREADLIST_EXITING_POISON && - (tl->th_flags & TH_LIST_UNBINDING)) { - /* - * we're called back synchronously from the context of - * kevent_qos_internal_unbind from within wqops_thread_return() - * we can try to match up this thread with this request ! - */ - } else { - tl = NULL; - } - - _threadreq_init_alloced(req, req_class, req_flags); - workqueue_lock_spin(wq); - PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, priority, 1, 0); - ret = workqueue_run_threadreq_and_unlock(p, wq, tl, req, false); - if (ret == WQ_RUN_TR_EXITING) { - return ECANCELED; - } - if (ret == WQ_RUN_TR_THREAD_NEEDED) { - if (type == WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL) { - return EAGAIN; - } - if (WQ_TIMER_IMMEDIATE_NEEDED(wq)) { - workqueue_interval_timer_trigger(wq); - } - } - return 0; - } - case WORKQ_THREADREQ_REDRIVE: - PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, 0, 0, 4, 0); - workqueue_lock_spin(wq); - ret = workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true); - if (ret == WQ_RUN_TR_EXITING) { - return ECANCELED; - } - return 0; - default: - return ENOTSUP; - } -} - -int -workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t _req, - enum workq_threadreq_op operation, unsigned long arg1, - unsigned long __unused arg2) -{ - struct threadreq *req = (struct threadreq *)_req; - struct workqueue *wq; - int priclass, ret = 0, wq_tr_rc = WQ_RUN_TR_THROTTLED; - - if (req == NULL || (wq = pthread_kern->proc_get_wqptr(p)) == NULL) { - return EINVAL; - } - - workqueue_lock_spin(wq); - - if (_wq_exiting(wq)) { - ret = ECANCELED; - goto out_unlock; - } - - /* - * Find/validate the referenced request structure - */ - if (req->tr_state != TR_STATE_WAITING) { - ret = EINVAL; - goto out_unlock; - } - assert(req->tr_priority < WORKQUEUE_EVENT_MANAGER_BUCKET); - assert(req->tr_flags & TR_FLAG_WORKLOOP); - - switch (operation) { - case WORKQ_THREADREQ_CHANGE_PRI: - case WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL: - priclass = pthread_priority_get_class_index(arg1); - PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, arg1, 2, 0); - if (req->tr_priority == priclass) { - goto out_unlock; - } - _threadreq_dequeue(wq, req); - req->tr_priority = priclass; - req->tr_state = TR_STATE_NEW; // what was old is new again - wq_tr_rc = workqueue_run_threadreq_and_unlock(p, wq, NULL, req, false); - goto out; - - case WORKQ_THREADREQ_CANCEL: - PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, 0, 3, 0); - _threadreq_dequeue(wq, req); - req->tr_state = TR_STATE_DEAD; - break; - - default: - ret = ENOTSUP; - break; - } - -out_unlock: - workqueue_unlock(wq); -out: - if (wq_tr_rc == WQ_RUN_TR_THREAD_NEEDED) { - if (operation == WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL) { - ret = EAGAIN; - } else if (WQ_TIMER_IMMEDIATE_NEEDED(wq)) { - workqueue_interval_timer_trigger(wq); - } - } - return ret; -} - - -static int -wqops_thread_return(struct proc *p, struct workqueue *wq) -{ - thread_t th = current_thread(); - struct uthread *uth = pthread_kern->get_bsdthread_info(th); - struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); - - /* reset signal mask on the workqueue thread to default state */ - if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) { - pthread_kern->proc_lock(p); - pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask); - pthread_kern->proc_unlock(p); - } - - if (wq == NULL || !tl) { - return EINVAL; - } - - PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_START, tl->th_workq, 0, 0, 0, 0); - - /* - * This squash call has neat semantics: it removes the specified overrides, - * replacing the current requested QoS with the previous effective QoS from - * those overrides. This means we won't be preempted due to having our QoS - * lowered. Of course, now our understanding of the thread's QoS is wrong, - * so we'll adjust below. - */ - bool was_manager = (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET); - int new_qos; - - if (!was_manager) { - new_qos = pthread_kern->proc_usynch_thread_qos_squash_override_for_resource(th, - THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD, - THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE); - } - - PTHREAD_TRACE_WQ(TRACE_wq_runitem | DBG_FUNC_END, wq, tl->th_priority, 0, 0, 0); - - workqueue_lock_spin(wq); - - if (tl->th_flags & TH_LIST_KEVENT_BOUND) { - unsigned int flags = KEVENT_FLAG_WORKQ; - if (was_manager) { - flags |= KEVENT_FLAG_WORKQ_MANAGER; - } - - tl->th_flags |= TH_LIST_UNBINDING; - workqueue_unlock(wq); - kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, flags); - if (!(tl->th_flags & TH_LIST_UNBINDING)) { - _setup_wqthread(p, th, wq, tl, WQ_SETUP_CLEAR_VOUCHER); - pthread_kern->unix_syscall_return(EJUSTRETURN); - __builtin_unreachable(); - } - workqueue_lock_spin(wq); - tl->th_flags &= ~(TH_LIST_KEVENT_BOUND | TH_LIST_UNBINDING); - } - - if (!was_manager) { - /* Fix up counters from the squash operation. */ - uint8_t old_bucket = tl->th_priority; - uint8_t new_bucket = thread_qos_get_class_index(new_qos); - - if (old_bucket != new_bucket) { - _wq_thactive_move(wq, old_bucket, new_bucket); - wq->wq_thscheduled_count[old_bucket]--; - wq->wq_thscheduled_count[new_bucket]++; - - PTHREAD_TRACE_WQ(TRACE_wq_thread_squash | DBG_FUNC_NONE, wq, tl->th_priority, new_bucket, 0, 0); - tl->th_priority = new_bucket; - PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_END, tl->th_workq, new_qos, 0, 0, 0); - } - } - - workqueue_run_threadreq_and_unlock(p, wq, tl, NULL, false); - return 0; -} - -/** - * Multiplexed call to interact with the workqueue mechanism - */ -int -_workq_kernreturn(struct proc *p, - int options, - user_addr_t item, - int arg2, - int arg3, - int32_t *retval) -{ - struct workqueue *wq; - int error = 0; - - if (pthread_kern->proc_get_register(p) == 0) { - return EINVAL; - } - - switch (options) { - case WQOPS_QUEUE_NEWSPISUPP: { - /* - * arg2 = offset of serialno into dispatch queue - * arg3 = kevent support - */ - int offset = arg2; - if (arg3 & 0x01){ - // If we get here, then userspace has indicated support for kevent delivery. - } - - pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset); - break; - } - case WQOPS_QUEUE_REQTHREADS: { - /* - * arg2 = number of threads to start - * arg3 = priority - */ - error = wqops_queue_reqthreads(p, arg2, arg3); - break; - } - case WQOPS_SET_EVENT_MANAGER_PRIORITY: { - /* - * arg2 = priority for the manager thread - * - * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the - * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead - * of a QOS value - */ - pthread_priority_t pri = arg2; - - wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p); - if (wq == NULL) { - error = EINVAL; - break; - } - workqueue_lock_spin(wq); - if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){ - /* - * If userspace passes a scheduling priority, that takes precidence - * over any QoS. (So, userspace should take care not to accidenatally - * lower the priority this way.) - */ - uint32_t sched_pri = pri & _PTHREAD_PRIORITY_SCHED_PRI_MASK; - if (wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){ - wq->wq_event_manager_priority = MAX(sched_pri, wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_MASK) - | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - } else { - wq->wq_event_manager_priority = sched_pri - | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - } - } else if ((wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){ - int cur_qos = pthread_priority_get_thread_qos(wq->wq_event_manager_priority); - int new_qos = pthread_priority_get_thread_qos(pri); - wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos, new_qos)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - } - workqueue_unlock(wq); - break; - } - case WQOPS_THREAD_KEVENT_RETURN: - case WQOPS_THREAD_WORKLOOP_RETURN: - wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p); - PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, options, 0, 0, 0); - if (item != 0 && arg2 != 0) { - int32_t kevent_retval; - int ret; - if (options == WQOPS_THREAD_KEVENT_RETURN) { - ret = kevent_qos_internal(p, -1, item, arg2, item, arg2, NULL, NULL, - KEVENT_FLAG_WORKQ | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS, - &kevent_retval); - } else /* options == WQOPS_THREAD_WORKLOOP_RETURN */ { - kqueue_id_t kevent_id = -1; - ret = kevent_id_internal(p, &kevent_id, item, arg2, item, arg2, - NULL, NULL, - KEVENT_FLAG_WORKLOOP | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS, - &kevent_retval); - } - /* - * We shouldn't be getting more errors out than events we put in, so - * reusing the input buffer should always provide enough space. But, - * the assert is commented out since we get errors in edge cases in the - * process lifecycle. - */ - //assert(ret == KERN_SUCCESS && kevent_retval >= 0); - if (ret != KERN_SUCCESS){ - error = ret; - break; - } else if (kevent_retval > 0){ - assert(kevent_retval <= arg2); - *retval = kevent_retval; - error = 0; - break; - } - } - goto thread_return; - - case WQOPS_THREAD_RETURN: - wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p); - PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, options, 0, 0, 0); - thread_return: - error = wqops_thread_return(p, wq); - // NOT REACHED except in case of error - assert(error); - break; - - case WQOPS_SHOULD_NARROW: { - /* - * arg2 = priority to test - * arg3 = unused - */ - pthread_priority_t priority = arg2; - thread_t th = current_thread(); - struct threadlist *tl = util_get_thread_threadlist_entry(th); - - if (tl == NULL || (tl->th_flags & TH_LIST_CONSTRAINED) == 0) { - error = EINVAL; - break; - } - - int class = pthread_priority_get_class_index(priority); - wq = tl->th_workq; - workqueue_lock_spin(wq); - bool should_narrow = !may_start_constrained_thread(wq, class, tl, false); - workqueue_unlock(wq); - - *retval = should_narrow; - break; - } - default: - error = EINVAL; - break; - } - - switch (options) { - case WQOPS_THREAD_KEVENT_RETURN: - case WQOPS_THREAD_WORKLOOP_RETURN: - case WQOPS_THREAD_RETURN: - PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, wq, options, 0, 0, 0); - break; - } - return (error); -} - -/* - * We have no work to do, park ourselves on the idle list. - * - * Consumes the workqueue lock and does not return. - */ -static void __dead2 -parkit(struct workqueue *wq, struct threadlist *tl, thread_t thread) -{ - assert(thread == tl->th_thread); - assert(thread == current_thread()); - - PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_START, wq, 0, 0, 0, 0); - - uint32_t us_to_wait = 0; - - TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry); - - tl->th_flags &= ~TH_LIST_RUNNING; - tl->th_flags &= ~TH_LIST_KEVENT; - assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0); - - if (tl->th_flags & TH_LIST_CONSTRAINED) { - wq->wq_constrained_threads_scheduled--; - tl->th_flags &= ~TH_LIST_CONSTRAINED; - } - - _wq_thactive_dec(wq, tl->th_priority); - wq->wq_thscheduled_count[tl->th_priority]--; - wq->wq_threads_scheduled--; - uint32_t thidlecount = ++wq->wq_thidlecount; - - pthread_kern->thread_sched_call(thread, NULL); - - /* - * We'd like to always have one manager thread parked so that we can have - * low latency when we need to bring a manager thread up. If that idle - * thread list is empty, make this thread a manager thread. - * - * XXX: This doesn't check that there's not a manager thread outstanding, - * so it's based on the assumption that most manager callouts will change - * their QoS before parking. If that stops being true, this may end up - * costing us more than we gain. - */ - if (TAILQ_EMPTY(&wq->wq_thidlemgrlist) && - tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET){ - PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE, - wq, thread_tid(thread), - (tl->th_priority << 16) | WORKQUEUE_EVENT_MANAGER_BUCKET, 2, 0); - reset_priority(tl, pthread_priority_from_wq_class_index(wq, WORKQUEUE_EVENT_MANAGER_BUCKET)); - tl->th_priority = WORKQUEUE_EVENT_MANAGER_BUCKET; - } - - if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){ - TAILQ_INSERT_HEAD(&wq->wq_thidlemgrlist, tl, th_entry); - } else { - TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry); - } - - /* - * When we remove the voucher from the thread, we may lose our importance - * causing us to get preempted, so we do this after putting the thread on - * the idle list. That when, when we get our importance back we'll be able - * to use this thread from e.g. the kevent call out to deliver a boosting - * message. - */ - tl->th_flags |= TH_LIST_REMOVING_VOUCHER; - workqueue_unlock(wq); - if (pthread_kern->thread_will_park_or_terminate) { - pthread_kern->thread_will_park_or_terminate(tl->th_thread); - } - __assert_only kern_return_t kr; - kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL); - assert(kr == KERN_SUCCESS); - workqueue_lock_spin(wq); - tl->th_flags &= ~(TH_LIST_REMOVING_VOUCHER); - - if ((tl->th_flags & TH_LIST_RUNNING) == 0) { - if (thidlecount < 101) { - us_to_wait = wq_reduce_pool_window_usecs - ((thidlecount-2) * (wq_reduce_pool_window_usecs / 100)); - } else { - us_to_wait = wq_reduce_pool_window_usecs / 100; - } - - thread_set_pending_block_hint(thread, kThreadWaitParkedWorkQueue); - assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE), - TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait, - wq_reduce_pool_window_usecs/10, NSEC_PER_USEC); - - workqueue_unlock(wq); - - thread_block(wq_unpark_continue); - panic("thread_block(wq_unpark_continue) returned!"); - } else { - workqueue_unlock(wq); - - /* - * While we'd dropped the lock to unset our voucher, someone came - * around and made us runnable. But because we weren't waiting on the - * event their wakeup() was ineffectual. To correct for that, we just - * run the continuation ourselves. - */ - wq_unpark_continue(NULL, THREAD_AWAKENED); - } -} - -static bool -may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, - struct threadlist *tl, bool may_start_timer) -{ - uint32_t req_qos = _wq_thactive_best_constrained_req_qos(wq); - wq_thactive_t thactive; - - if (may_start_timer && at_priclass < req_qos) { - /* - * When called from workqueue_run_threadreq_and_unlock() pre-post newest - * higher priorities into the thactive state so that - * workqueue_callback() takes the right decision. - * - * If the admission check passes, workqueue_run_threadreq_and_unlock - * will reset this value before running the request. - */ - thactive = _wq_thactive_set_best_constrained_req_qos(wq, req_qos, - at_priclass); -#ifdef __LP64__ - PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 1, (uint64_t)thactive, - (uint64_t)(thactive >> 64), 0, 0); -#endif - } else { - thactive = _wq_thactive(wq); - } - - uint32_t constrained_threads = wq->wq_constrained_threads_scheduled; - if (tl && (tl->th_flags & TH_LIST_CONSTRAINED)) { - /* - * don't count the current thread as scheduled - */ - constrained_threads--; - } - if (constrained_threads >= wq_max_constrained_threads) { - PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 1, - wq->wq_constrained_threads_scheduled, - wq_max_constrained_threads, 0); - /* - * we need 1 or more constrained threads to return to the kernel before - * we can dispatch additional work - */ - return false; - } - - /* - * Compute a metric for many how many threads are active. We find the - * highest priority request outstanding and then add up the number of - * active threads in that and all higher-priority buckets. We'll also add - * any "busy" threads which are not active but blocked recently enough that - * we can't be sure they've gone idle yet. We'll then compare this metric - * to our max concurrency to decide whether to add a new thread. - */ - - uint32_t busycount, thactive_count; - - thactive_count = _wq_thactive_aggregate_downto_qos(wq, thactive, - at_priclass, &busycount, NULL); - - if (tl && tl->th_priority <= at_priclass) { - /* - * don't count this thread as currently active - */ - assert(thactive_count > 0); - thactive_count--; - } - - if (thactive_count + busycount < wq_max_concurrency[at_priclass]) { - PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 2, - thactive_count, busycount, 0); - return true; - } else { - PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 3, - thactive_count, busycount, 0); - } - - if (busycount && may_start_timer) { - /* - * If this is called from the add timer, we won't have another timer - * fire when the thread exits the "busy" state, so rearm the timer. - */ - if (WQ_TIMER_DELAYED_NEEDED(wq)) { - workqueue_interval_timer_start(wq); - } - } - - return false; -} - -static struct threadlist * -pop_from_thidlelist(struct workqueue *wq, uint32_t priclass) -{ - assert(wq->wq_thidlecount); - - struct threadlist *tl = NULL; - - if (!TAILQ_EMPTY(&wq->wq_thidlemgrlist) && - (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlelist))){ - tl = TAILQ_FIRST(&wq->wq_thidlemgrlist); - TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry); - assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET); - } else if (!TAILQ_EMPTY(&wq->wq_thidlelist) && - (priclass != WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlemgrlist))){ - tl = TAILQ_FIRST(&wq->wq_thidlelist); - TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); - assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET); - } else { - panic("pop_from_thidlelist called with no threads available"); - } - assert((tl->th_flags & TH_LIST_RUNNING) == 0); - - assert(wq->wq_thidlecount); - wq->wq_thidlecount--; - - TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry); - - tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY; - - wq->wq_threads_scheduled++; - wq->wq_thscheduled_count[priclass]++; - _wq_thactive_inc(wq, priclass); - return tl; -} - -static pthread_priority_t -pthread_priority_from_wq_class_index(struct workqueue *wq, int index) -{ - if (index == WORKQUEUE_EVENT_MANAGER_BUCKET){ - return wq->wq_event_manager_priority; - } else { - return class_index_get_pthread_priority(index); - } -} - -static void -reset_priority(struct threadlist *tl, pthread_priority_t pri) -{ - kern_return_t ret; - thread_t th = tl->th_thread; - - if ((pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){ - ret = pthread_kern->thread_set_workq_qos(th, pthread_priority_get_thread_qos(pri), 0); - assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED); - - if (tl->th_flags & TH_LIST_EVENT_MGR_SCHED_PRI) { - - /* Reset priority to default (masked by QoS) */ - - ret = pthread_kern->thread_set_workq_pri(th, 31, POLICY_TIMESHARE); - assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED); - - tl->th_flags &= ~TH_LIST_EVENT_MGR_SCHED_PRI; - } - } else { - ret = pthread_kern->thread_set_workq_qos(th, THREAD_QOS_UNSPECIFIED, 0); - assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED); - ret = pthread_kern->thread_set_workq_pri(th, (pri & (~_PTHREAD_PRIORITY_FLAGS_MASK)), POLICY_TIMESHARE); - assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED); - - tl->th_flags |= TH_LIST_EVENT_MGR_SCHED_PRI; - } -} - -/* - * Picks the best request to run, and returns the best overcommit fallback - * if the best pick is non overcommit and risks failing its admission check. - */ -static struct threadreq * -workqueue_best_threadreqs(struct workqueue *wq, struct threadlist *tl, - struct threadreq **fallback) -{ - struct threadreq *req, *best_req = NULL; - int priclass, prilimit; - - if ((wq->wq_event_manager_threadreq.tr_state == TR_STATE_WAITING) && - ((wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0) || - (tl && tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) { - /* - * There's an event manager request and either: - * - no event manager currently running - * - we are re-using the event manager - */ - req = &wq->wq_event_manager_threadreq; - PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, req, 1, 0, 0); - return req; - } - - if (tl) { - prilimit = WORKQUEUE_EVENT_MANAGER_BUCKET; - } else { - prilimit = _wq_highest_paced_priority(wq); - } - for (priclass = 0; priclass < prilimit; priclass++) { - req = TAILQ_FIRST(&wq->wq_overcommit_reqlist[priclass]); - if (req) { - PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, req, 2, 0, 0); - if (best_req) { - *fallback = req; - } else { - best_req = req; - } - break; - } - if (!best_req) { - best_req = TAILQ_FIRST(&wq->wq_reqlist[priclass]); - if (best_req) { - PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, best_req, 3, 0, 0); - } - } - } - return best_req; -} - -/** - * Runs a thread request on a thread - * - * - if thread is THREAD_NULL, will find a thread and run the request there. - * Otherwise, the thread must be the current thread. - * - * - if req is NULL, will find the highest priority request and run that. If - * it is not NULL, it must be a threadreq object in state NEW. If it can not - * be run immediately, it will be enqueued and moved to state WAITING. - * - * Either way, the thread request object serviced will be moved to state - * PENDING and attached to the threadlist. - * - * Should be called with the workqueue lock held. Will drop it. - * - * WARNING: _workq_kevent_reqthreads needs to be able to preflight any - * admission checks in this function. If you are changing this function, - * keep that one up-to-date. - * - * - if parking_tl is non NULL, then the current thread is parking. This will - * try to reuse this thread for a request. If no match is found, it will be - * parked. - */ -static int -workqueue_run_threadreq_and_unlock(proc_t p, struct workqueue *wq, - struct threadlist *parking_tl, struct threadreq *req, - bool may_add_new_thread) -{ - struct threadreq *incoming_req = req; - - struct threadlist *tl = parking_tl; - int rc = WQ_RUN_TR_THROTTLED; - - assert(tl == NULL || tl->th_thread == current_thread()); - assert(req == NULL || req->tr_state == TR_STATE_NEW); - assert(!may_add_new_thread || !tl); - - PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq | DBG_FUNC_START, wq, req, - tl ? thread_tid(tl->th_thread) : 0, - req ? (req->tr_priority << 16 | req->tr_flags) : 0, 0); - - /* - * Special cases when provided an event manager request - */ - if (req && req->tr_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { - // Clients must not rely on identity of event manager requests - assert(req->tr_flags & TR_FLAG_ONSTACK); - // You can't be both overcommit and event manager - assert((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0); - - /* - * We can only ever have one event manager request, so coalesce them if - * there's already one outstanding. - */ - if (wq->wq_event_manager_threadreq.tr_state == TR_STATE_WAITING) { - PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_mgr_merge | DBG_FUNC_NONE, wq, req, 0, 0, 0); - - struct threadreq *existing_req = &wq->wq_event_manager_threadreq; - if (req->tr_flags & TR_FLAG_KEVENT) { - existing_req->tr_flags |= TR_FLAG_KEVENT; - } - - req = existing_req; - incoming_req = NULL; - } - - if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] && - (!tl || tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET)){ - /* - * There can only be one event manager running at a time. - */ - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 1, 0, 0, 0); - goto done; - } - } - -again: // Start again after creating a thread - - if (_wq_exiting(wq)) { - rc = WQ_RUN_TR_EXITING; - goto exiting; - } - - /* - * Thread request selection and admission control - */ - struct threadreq *fallback = NULL; - if (req) { - if ((req->tr_flags & TR_FLAG_NO_PACING) == 0 && - _wq_should_pace_priority(wq, req->tr_priority)) { - /* - * If a request fails the pacing admission check, then thread - * requests are redriven when the pacing thread is finally scheduled - * when it calls _wq_pacing_end() in wq_unpark_continue(). - */ - goto done; - } - } else if (wq->wq_reqcount == 0) { - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 2, 0, 0, 0); - goto done; - } else if ((req = workqueue_best_threadreqs(wq, tl, &fallback)) == NULL) { - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 3, 0, 0, 0); - goto done; - } - - if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0 && - (req->tr_priority < WORKQUEUE_EVENT_MANAGER_BUCKET)) { - if (!may_start_constrained_thread(wq, req->tr_priority, parking_tl, true)) { - if (!fallback) { - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 4, 0, 0, 0); - goto done; - } - assert(req->tr_state == TR_STATE_WAITING); - req = fallback; - } - } - - /* - * Thread selection. - */ - if (parking_tl) { - if (tl->th_priority != req->tr_priority) { - _wq_thactive_move(wq, tl->th_priority, req->tr_priority); - wq->wq_thscheduled_count[tl->th_priority]--; - wq->wq_thscheduled_count[req->tr_priority]++; - } - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE, - wq, 1, thread_tid(tl->th_thread), 0, 0); - } else if (wq->wq_thidlecount) { - tl = pop_from_thidlelist(wq, req->tr_priority); - /* - * This call will update wq_thscheduled_count and wq_thactive_count for - * the provided priority. It will not set the returned thread to that - * priority. This matches the behavior of the parking_tl clause above. - */ - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE, - wq, 2, thread_tid(tl->th_thread), 0, 0); - } else /* no idle threads */ { - if (!may_add_new_thread || wq->wq_nthreads >= wq_max_threads) { - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 5, - may_add_new_thread, wq->wq_nthreads, 0); - if (wq->wq_nthreads < wq_max_threads) { - rc = WQ_RUN_TR_THREAD_NEEDED; - } - goto done; - } - - bool added_thread = workqueue_addnewthread(p, wq); - /* - * workqueue_addnewthread will drop and re-take the lock, so we - * need to ensure we still have a cached request. - * - * It also means we have to pick a new request, since our old pick may - * not be valid anymore. - */ - req = incoming_req; - if (req && (req->tr_flags & TR_FLAG_ONSTACK)) { - _threadreq_copy_prepare(wq); - } - - if (added_thread) { - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE, - wq, 3, 0, 0, 0); - goto again; - } else if (_wq_exiting(wq)) { - rc = WQ_RUN_TR_EXITING; - goto exiting; - } else { - PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 6, 0, 0, 0); - /* - * Something caused thread creation to fail. Kick off the timer in - * the hope that it'll succeed next time. - */ - if (WQ_TIMER_DELAYED_NEEDED(wq)) { - workqueue_interval_timer_start(wq); - } - goto done; - } - } - - /* - * Setup thread, mark request as complete and run with it. - */ - if (req->tr_state == TR_STATE_WAITING) { - _threadreq_dequeue(wq, req); - } - if (tl->th_priority != req->tr_priority) { - PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE, - wq, thread_tid(tl->th_thread), - (tl->th_priority << 16) | req->tr_priority, 1, 0); - reset_priority(tl, pthread_priority_from_wq_class_index(wq, req->tr_priority)); - tl->th_priority = (uint8_t)req->tr_priority; - } - if (req->tr_flags & TR_FLAG_OVERCOMMIT) { - if ((tl->th_flags & TH_LIST_CONSTRAINED) != 0) { - tl->th_flags &= ~TH_LIST_CONSTRAINED; - wq->wq_constrained_threads_scheduled--; - } - } else { - if ((tl->th_flags & TH_LIST_CONSTRAINED) == 0) { - tl->th_flags |= TH_LIST_CONSTRAINED; - wq->wq_constrained_threads_scheduled++; - } - } - - if (!parking_tl && !(req->tr_flags & TR_FLAG_NO_PACING)) { - _wq_pacing_start(wq, tl); - } - if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) { - uint32_t old_qos, new_qos; - - /* - * If we are scheduling a constrained thread request, we may need to - * update the best constrained qos in the thactive atomic state. - */ - for (new_qos = 0; new_qos < WQ_THACTIVE_NO_PENDING_REQUEST; new_qos++) { - if (TAILQ_FIRST(&wq->wq_reqlist[new_qos])) - break; - } - old_qos = _wq_thactive_best_constrained_req_qos(wq); - if (old_qos != new_qos) { - wq_thactive_t v = _wq_thactive_set_best_constrained_req_qos(wq, - old_qos, new_qos); -#ifdef __LP64__ - PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 2, (uint64_t)v, - (uint64_t)(v >> 64), 0, 0); -#else - PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 2, v, 0, 0, 0); -#endif - } - } - { - uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI; - if (req->tr_flags & TR_FLAG_OVERCOMMIT) - upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT; - if (req->tr_flags & TR_FLAG_KEVENT) - upcall_flags |= WQ_FLAG_THREAD_KEVENT; - if (req->tr_flags & TR_FLAG_WORKLOOP) - upcall_flags |= WQ_FLAG_THREAD_WORKLOOP | WQ_FLAG_THREAD_KEVENT; - if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) - upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER; - tl->th_upcall_flags = upcall_flags >> WQ_FLAG_THREAD_PRIOSHIFT; - } - if (req->tr_flags & TR_FLAG_KEVENT) { - tl->th_flags |= TH_LIST_KEVENT; - } else { - tl->th_flags &= ~TH_LIST_KEVENT; - } - return _threadreq_complete_and_unlock(p, wq, req, tl); - -done: - if (incoming_req) { - _threadreq_enqueue(wq, incoming_req); - } - -exiting: - - if (parking_tl && !(parking_tl->th_flags & TH_LIST_UNBINDING)) { - parkit(wq, parking_tl, parking_tl->th_thread); - __builtin_unreachable(); - } - - workqueue_unlock(wq); - - return rc; -} - -/** - * parked thread wakes up - */ -static void __dead2 -wq_unpark_continue(void* __unused ptr, wait_result_t wait_result) -{ - boolean_t first_use = false; - thread_t th = current_thread(); - proc_t p = current_proc(); - - struct uthread *uth = pthread_kern->get_bsdthread_info(th); - if (uth == NULL) goto done; - - struct workqueue *wq = pthread_kern->proc_get_wqptr(p); - if (wq == NULL) goto done; - - workqueue_lock_spin(wq); - - struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth); - assert(tl != WQ_THREADLIST_EXITING_POISON); - if (tl == NULL) { - /* - * We woke up before addnewthread() was finished setting us up. Go - * ahead and exit, but before we do poison the threadlist variable so - * that addnewthread() doesn't think we are valid still. - */ - pthread_kern->uthread_set_threadlist(uth, WQ_THREADLIST_EXITING_POISON); - workqueue_unlock(wq); - goto done; - } - - assert(tl->th_flags & TH_LIST_INITED); - - if ((tl->th_flags & TH_LIST_NEW)){ - tl->th_flags &= ~(TH_LIST_NEW); - first_use = true; - } - - if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) { - /* - * The normal wakeup path. - */ - goto return_to_user; - } - - if ((tl->th_flags & TH_LIST_RUNNING) == 0 && - wait_result == THREAD_TIMED_OUT && - tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET && - TAILQ_FIRST(&wq->wq_thidlemgrlist) == tl && - TAILQ_NEXT(tl, th_entry) == NULL){ - /* - * If we are the only idle manager and we pop'ed for self-destruction, - * then don't actually exit. Instead, free our stack to save some - * memory and re-park. - */ - - workqueue_unlock(wq); - - vm_map_t vmap = wq->wq_map; - - // Keep this in sync with _setup_wqthread() - const vm_size_t guardsize = vm_map_page_size(vmap); - const user_addr_t freeaddr = (user_addr_t)tl->th_stackaddr + guardsize; - const vm_map_offset_t freesize = vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, vm_map_page_mask(vmap)) - guardsize; - - __assert_only int kr = mach_vm_behavior_set(vmap, freeaddr, freesize, VM_BEHAVIOR_REUSABLE); -#if MACH_ASSERT - if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) { - os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr); - } -#endif - - workqueue_lock_spin(wq); - - if ( !(tl->th_flags & TH_LIST_RUNNING)) { - thread_set_pending_block_hint(th, kThreadWaitParkedWorkQueue); - assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE)); - - workqueue_unlock(wq); - - thread_block(wq_unpark_continue); - __builtin_unreachable(); - } - } - - if ((tl->th_flags & TH_LIST_RUNNING) == 0) { - assert((tl->th_flags & TH_LIST_BUSY) == 0); - if (!first_use) { - PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, 0, 0, 0, 0); - } - /* - * We were set running, but not for the purposes of actually running. - * This could be because the timer elapsed. Or it could be because the - * thread aborted. Either way, we need to return to userspace to exit. - * - * The call to workqueue_removethread will consume the lock. - */ - - if (!first_use && - (tl->th_priority < qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS) || - (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) { - // Reset the QoS to something low for the pthread cleanup - PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE, - wq, thread_tid(th), - (tl->th_priority << 16) | qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS), 3, 0); - pthread_priority_t cleanup_pri = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0); - reset_priority(tl, cleanup_pri); - } - - workqueue_removethread(tl, 0, first_use); - - if (first_use){ - pthread_kern->thread_bootstrap_return(); - } else { - pthread_kern->unix_syscall_return(0); - } - __builtin_unreachable(); - } - - /* - * The timer woke us up or the thread was aborted. However, we have - * already started to make this a runnable thread. Wait for that to - * finish, then continue to userspace. - */ - while ((tl->th_flags & TH_LIST_BUSY)) { - assert_wait((caddr_t)tl, (THREAD_UNINT)); - - workqueue_unlock(wq); - - thread_block(THREAD_CONTINUE_NULL); - - workqueue_lock_spin(wq); - } - -return_to_user: - if (!first_use) { - PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, 0, 0, 0, 0); - } - if (_wq_pacing_end(wq, tl) && wq->wq_reqcount) { - workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true); - } else { - workqueue_unlock(wq); - } - _setup_wqthread(p, th, wq, tl, first_use ? WQ_SETUP_FIRST_USE : 0); - pthread_kern->thread_sched_call(th, workqueue_callback); -done: - if (first_use){ - pthread_kern->thread_bootstrap_return(); - } else { - pthread_kern->unix_syscall_return(EJUSTRETURN); - } - panic("Our attempt to return to userspace failed..."); -} + if (kevent_data_available == WQ_KEVENT_DATA_SIZE) { + workq_thread_set_top_addr(th_addrs, kevent_id_addr); + } else { + workq_thread_set_top_addr(th_addrs, + kevent_data_buf + kevent_data_available); + } + *kevent_count_out = kevent_count; + *kevent_list_out = kevent_list; + return ret; +} /** * configures initial thread stack/registers to jump into: @@ -3787,282 +840,90 @@ done: * |guard page | guardsize * |-----------| th_stackaddr */ +__attribute__((noreturn,noinline)) void -_setup_wqthread(proc_t p, thread_t th, struct workqueue *wq, - struct threadlist *tl, int setup_flags) +workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr, + mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags) { - int error; - if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) { - /* - * For preemption reasons, we want to reset the voucher as late as - * possible, so we do it in two places: - * - Just before parking (i.e. in parkit()) - * - Prior to doing the setup for the next workitem (i.e. here) - * - * Those two places are sufficient to ensure we always reset it before - * it goes back out to user space, but be careful to not break that - * guarantee. - */ - __assert_only kern_return_t kr; - kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL); - assert(kr == KERN_SUCCESS); - } - - uint32_t upcall_flags = tl->th_upcall_flags << WQ_FLAG_THREAD_PRIOSHIFT; - if (!(setup_flags & WQ_SETUP_FIRST_USE)) { - upcall_flags |= WQ_FLAG_THREAD_REUSE; - } - - /* - * Put the QoS class value into the lower bits of the reuse_thread register, this is where - * the thread priority used to be stored anyway. - */ - pthread_priority_t priority = pthread_priority_from_wq_class_index(wq, tl->th_priority); - upcall_flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK); - - const vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map); - const vm_size_t stack_gap_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_REDZONE_LEN; - const vm_size_t stack_align_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_STK_ALIGN; - - user_addr_t pthread_self_addr = (user_addr_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET); - user_addr_t stack_top_addr = (user_addr_t)((pthread_self_addr - stack_gap_min) & -stack_align_min); - user_addr_t stack_bottom_addr = (user_addr_t)(tl->th_stackaddr + guardsize); + struct workq_thread_addrs th_addrs; + bool first_use = (setup_flags & WQ_SETUP_FIRST_USE); + user_addr_t kevent_list = NULL; + int kevent_count = 0; - user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p); - if (!wqstart_fnptr) { - panic("workqueue thread start function pointer is NULL"); - } + workq_thread_get_addrs(map, stackaddr, &th_addrs); - if (setup_flags & WQ_SETUP_FIRST_USE) { + if (first_use) { uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p); if (tsd_offset) { - mach_vm_offset_t th_tsd_base = (mach_vm_offset_t)pthread_self_addr + tsd_offset; - kern_return_t kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base); + mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset; + kern_return_t kret = pthread_kern->thread_set_tsd_base(th, + th_tsd_base); if (kret == KERN_SUCCESS) { upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET; } } /* - * Pre-fault the first page of the new thread's stack and the page that will - * contain the pthread_t structure. - */ - vm_map_t vmap = pthread_kern->current_map(); - if (vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) != - vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap))){ - vm_fault( vmap, - vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); + * Pre-fault the first page of the new thread's stack and the page that will + * contain the pthread_t structure. + */ + vm_map_offset_t mask = vm_map_page_mask(map); + vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask); + vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask); + if (th_page != stk_page) { + vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); } - vm_fault( vmap, - vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap)), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); + vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); } - user_addr_t kevent_list = NULL; - int kevent_count = 0; - if (upcall_flags & WQ_FLAG_THREAD_KEVENT){ - bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP; - - kevent_list = pthread_self_addr - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s); - kevent_count = WQ_KEVENT_LIST_LEN; - - user_addr_t kevent_id_addr = kevent_list; - if (workloop) { - /* - * The kevent ID goes just below the kevent list. Sufficiently new - * userspace will know to look there. Old userspace will just - * ignore it. - */ - kevent_id_addr -= sizeof(kqueue_id_t); - } - - user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE; - user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE; - - int32_t events_out = 0; - - assert(tl->th_flags | TH_LIST_KEVENT_BOUND); + if (setup_flags & WQ_SETUP_EXIT_THREAD) { + kevent_count = WORKQ_EXIT_THREAD_NKEVENT; + } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) { unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE; - if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) { - flags |= KEVENT_FLAG_WORKQ_MANAGER; - } - int ret = 0; - if (workloop) { - flags |= KEVENT_FLAG_WORKLOOP; - kqueue_id_t kevent_id = -1; - ret = kevent_id_internal(p, &kevent_id, - NULL, 0, kevent_list, kevent_count, - kevent_data_buf, &kevent_data_available, - flags, &events_out); - copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id)); - } else { - flags |= KEVENT_FLAG_WORKQ; - ret = kevent_qos_internal(p, - class_index_get_thread_qos(tl->th_priority), - NULL, 0, kevent_list, kevent_count, - kevent_data_buf, &kevent_data_available, - flags, &events_out); - } - - // squash any errors into just empty output - if (ret != KERN_SUCCESS || events_out == -1){ - events_out = 0; - kevent_data_available = WQ_KEVENT_DATA_SIZE; - } - - // We shouldn't get data out if there aren't events available - assert(events_out != 0 || kevent_data_available == WQ_KEVENT_DATA_SIZE); - - if (events_out > 0){ - if (kevent_data_available == WQ_KEVENT_DATA_SIZE){ - stack_top_addr = (kevent_id_addr - stack_gap_min) & -stack_align_min; - } else { - stack_top_addr = (kevent_data_buf + kevent_data_available - stack_gap_min) & -stack_align_min; - } - - kevent_count = events_out; - } else { - kevent_list = NULL; - kevent_count = 0; - } + workq_kevent(p, &th_addrs, upcall_flags, NULL, 0, flags, + &kevent_list, &kevent_count); } - PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, wq, 0, 0, 0, 0); - -#if defined(__i386__) || defined(__x86_64__) - if (proc_is64bit(p) == 0) { - x86_thread_state32_t state = { - .eip = (unsigned int)wqstart_fnptr, - .eax = /* arg0 */ (unsigned int)pthread_self_addr, - .ebx = /* arg1 */ (unsigned int)tl->th_thport, - .ecx = /* arg2 */ (unsigned int)stack_bottom_addr, - .edx = /* arg3 */ (unsigned int)kevent_list, - .edi = /* arg4 */ (unsigned int)upcall_flags, - .esi = /* arg5 */ (unsigned int)kevent_count, - - .esp = (int)((vm_offset_t)stack_top_addr), - }; + workq_set_register_state(p, th, &th_addrs, kport, + kevent_list, upcall_flags, kevent_count); - error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state); - if (error != KERN_SUCCESS) { - panic(__func__ ": thread_set_wq_state failed: %d", error); - } + if (first_use) { + pthread_kern->thread_bootstrap_return(); } else { - x86_thread_state64_t state64 = { - // x86-64 already passes all the arguments in registers, so we just put them in their final place here - .rip = (uint64_t)wqstart_fnptr, - .rdi = (uint64_t)pthread_self_addr, - .rsi = (uint64_t)tl->th_thport, - .rdx = (uint64_t)stack_bottom_addr, - .rcx = (uint64_t)kevent_list, - .r8 = (uint64_t)upcall_flags, - .r9 = (uint64_t)kevent_count, - - .rsp = (uint64_t)(stack_top_addr) - }; - - error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64); - if (error != KERN_SUCCESS) { - panic(__func__ ": thread_set_wq_state failed: %d", error); - } + pthread_kern->unix_syscall_return(EJUSTRETURN); } -#else -#error setup_wqthread not defined for this architecture -#endif -} - -#if DEBUG -static int wq_kevent_test SYSCTL_HANDLER_ARGS { - //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) -#pragma unused(oidp, arg1, arg2) - int error; - struct workq_reqthreads_req_s requests[64] = {}; - - if (req->newlen > sizeof(requests) || req->newlen < sizeof(struct workq_reqthreads_req_s)) - return EINVAL; - - error = copyin(req->newptr, requests, req->newlen); - if (error) return error; - - _workq_reqthreads(req->p, (int)(req->newlen / sizeof(struct workq_reqthreads_req_s)), requests); - - return 0; + __builtin_unreachable(); } -#endif // DEBUG - -#pragma mark - Misc int -_fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo) +workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map, + user_addr_t stackaddr, mach_port_name_t kport, + user_addr_t events, int nevents, int upcall_flags) { - struct workqueue * wq; - int error = 0; - int activecount; - - if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) { - return EINVAL; - } - - /* - * This is sometimes called from interrupt context by the kperf sampler. - * In that case, it's not safe to spin trying to take the lock since we - * might already hold it. So, we just try-lock it and error out if it's - * already held. Since this is just a debugging aid, and all our callers - * are able to handle an error, that's fine. - */ - bool locked = workqueue_lock_try(wq); - if (!locked) { - return EBUSY; - } - - activecount = _wq_thactive_aggregate_downto_qos(wq, _wq_thactive(wq), - WORKQUEUE_NUM_BUCKETS - 1, NULL, NULL); - pwqinfo->pwq_nthreads = wq->wq_nthreads; - pwqinfo->pwq_runthreads = activecount; - pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount; - pwqinfo->pwq_state = 0; - - if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { - pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT; - } - - if (wq->wq_nthreads >= wq_max_threads) { - pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT; - } - - workqueue_unlock(wq); - return(error); -} + struct workq_thread_addrs th_addrs; + user_addr_t kevent_list = NULL; + int kevent_count = 0, error; + __assert_only kern_return_t kr; -uint32_t -_get_pwq_state_kdp(proc_t p) -{ - if (p == NULL) { - return 0; - } + workq_thread_get_addrs(map, stackaddr, &th_addrs); - struct workqueue *wq = pthread_kern->proc_get_wqptr(p); + unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE | + KEVENT_FLAG_PARKING; + error = workq_kevent(p, &th_addrs, upcall_flags, events, nevents, flags, + &kevent_list, &kevent_count); - if (wq == NULL || workqueue_lock_spin_is_acquired_kdp(wq)) { - return 0; + if (error || kevent_count == 0) { + return error; } - uint32_t pwq_state = WQ_FLAGS_AVAILABLE; - - if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { - pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT; - } + kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL); + assert(kr == KERN_SUCCESS); - if (wq->wq_nthreads >= wq_max_threads) { - pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT; - } + workq_set_register_state(p, th, &th_addrs, kport, + kevent_list, upcall_flags, kevent_count); - return pwq_state; + pthread_kern->unix_syscall_return(EJUSTRETURN); + __builtin_unreachable(); } int @@ -4083,44 +944,16 @@ _pthread_init(void) * allocate the lock attribute for pthread synchronizers */ pthread_lck_attr = lck_attr_alloc_init(); - pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); pth_global_hashinit(); psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL); psynch_zoneinit(); - pthread_zone_workqueue = zinit(sizeof(struct workqueue), - 1024 * sizeof(struct workqueue), 8192, "pthread.workqueue"); - pthread_zone_threadlist = zinit(sizeof(struct threadlist), - 1024 * sizeof(struct threadlist), 8192, "pthread.threadlist"); - pthread_zone_threadreq = zinit(sizeof(struct threadreq), - 1024 * sizeof(struct threadreq), 8192, "pthread.threadreq"); - int policy_bootarg; if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) { pthread_mutex_default_policy = policy_bootarg; } - /* - * register sysctls - */ - sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs); - sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs); - sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs); - sysctl_register_oid(&sysctl__kern_wq_max_threads); - sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads); - sysctl_register_oid(&sysctl__kern_pthread_debug_tracing); sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy); - -#if DEBUG - sysctl_register_oid(&sysctl__debug_wq_kevent_test); -#endif - - for (int i = 0; i < WORKQUEUE_NUM_BUCKETS; i++) { - uint32_t thread_qos = _wq_bucket_to_thread_qos(i); - wq_max_concurrency[i] = pthread_kern->qos_max_parallelism(thread_qos, - QOS_PARALLELISM_COUNT_LOGICAL); - } - wq_max_concurrency[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1; } diff --git a/kern/kern_synch.c b/kern/kern_synch.c index 217ddcb..7dabe41 100644 --- a/kern/kern_synch.c +++ b/kern/kern_synch.c @@ -69,6 +69,7 @@ #include #include #include +#include //#include #include #include @@ -82,7 +83,6 @@ #include #include -#include #include "kern_internal.h" #include "synch_internal.h" @@ -92,9 +92,7 @@ typedef struct uthread *uthread_t; //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0) #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0) - -#define ECVCERORR 256 -#define ECVPERORR 512 +#define __FAILEDUSERTEST2__(s, x...) do { printf("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0) lck_mtx_t *pthread_list_mlock; @@ -119,17 +117,23 @@ struct ksyn_queue { }; typedef struct ksyn_queue *ksyn_queue_t; -enum { +typedef enum { KSYN_QUEUE_READ = 0, - KSYN_QUEUE_WRITER, + KSYN_QUEUE_WRITE, KSYN_QUEUE_MAX, -}; +} kwq_queue_type_t; + +typedef enum { + KWQ_INTR_NONE = 0, + KWQ_INTR_READ = 0x1, + KWQ_INTR_WRITE = 0x2, +} kwq_intr_type_t; struct ksyn_wait_queue { LIST_ENTRY(ksyn_wait_queue) kw_hash; LIST_ENTRY(ksyn_wait_queue) kw_list; user_addr_t kw_addr; - uint64_t kw_owner; + thread_t kw_owner; /* current owner or THREAD_NULL, has a +1 */ uint64_t kw_object; /* object backing in shared mode */ uint64_t kw_offset; /* offset inside the object in shared mode */ int kw_pflags; /* flags under listlock protection */ @@ -151,19 +155,23 @@ struct ksyn_wait_queue { uint32_t kw_lastseqword; /* the last seq that unlocked */ /* for mutex and cvar we need to track I bit values */ uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */ - uint32_t kw_overlapwatch; /* chance for overlaps */ - uint32_t kw_pre_rwwc; /* prepost count */ - uint32_t kw_pre_lockseq; /* prepost target seq */ - uint32_t kw_pre_sseq; /* prepost target sword, in cvar used for mutexowned */ - uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */ - uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */ - uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */ - uint32_t kw_pre_intrtype; /* type of failed wakueps*/ + struct { + uint32_t count; /* prepost count */ + uint32_t lseq; /* prepost target seq */ + uint32_t sseq; /* prepost target sword, in cvar used for mutexowned */ + } kw_prepost; + struct { + kwq_intr_type_t type; /* type of failed wakueps */ + uint32_t count; /* prepost of missed wakeup due to intrs */ + uint32_t seq; /* prepost of missed wakeup limit seq */ + uint32_t returnbits; /* return bits value for missed wakeup threads */ + } kw_intr; int kw_kflags; int kw_qos_override; /* QoS of max waiter during contention period */ + struct turnstile *kw_turnstile; struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */ - lck_mtx_t kw_lock; /* mutex lock protecting this structure */ + lck_spin_t kw_lock; /* spinlock protecting this structure */ }; typedef struct ksyn_wait_queue * ksyn_wait_queue_t; @@ -189,14 +197,9 @@ typedef struct ksyn_wait_queue * ksyn_wait_queue_t; /* * Mutex policy attributes */ -#define _PTHREAD_MUTEX_POLICY_NONE 0 -#define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */ -#define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */ -#define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */ -#define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */ -#define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */ -#define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */ -#define PTHREAD_POLICY_FLAGS_MASK 0x1c0 +#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE 0x040 /* 1 */ +#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT 0x080 /* 2 */ +#define _PTHREAD_MTX_OPT_POLICY_MASK 0x1c0 /* pflags */ #define KSYN_WQ_INHASH 2 @@ -205,9 +208,10 @@ typedef struct ksyn_wait_queue * ksyn_wait_queue_t; #define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */ /* kflags */ -#define KSYN_KWF_INITCLEARED 1 /* the init status found and preposts cleared */ -#define KSYN_KWF_ZEROEDOUT 2 /* the lword, etc are inited to 0 */ -#define KSYN_KWF_QOS_APPLIED 4 /* QoS override applied to owner */ +#define KSYN_KWF_INITCLEARED 0x1 /* the init status found and preposts cleared */ +#define KSYN_KWF_ZEROEDOUT 0x2 /* the lword, etc are inited to 0 */ +#define KSYN_KWF_QOS_APPLIED 0x4 /* QoS override applied to owner */ +#define KSYN_KWF_OVERLAP_GUARD 0x8 /* overlap guard */ #define KSYN_CLEANUP_DEADLINE 10 static int psynch_cleanupset; @@ -223,47 +227,24 @@ thread_call_t psynch_thcall; #define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX) -#define KW_UNLOCK_PREPOST 0x01 -#define KW_UNLOCK_PREPOST_READLOCK 0x08 -#define KW_UNLOCK_PREPOST_WRLOCK 0x20 - -static void -CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq) +static inline int +_kwq_type(ksyn_wait_queue_t kwq) { - kwq->kw_pre_lockseq = 0; - kwq->kw_pre_sseq = PTHRW_RWS_INIT; - kwq->kw_pre_rwwc = 0; + return (kwq->kw_type & KSYN_WQTYPE_MASK); } -static void -CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq) +static inline bool +_kwq_use_turnstile(ksyn_wait_queue_t kwq) { - kwq->kw_pre_intrcount = 0; - kwq->kw_pre_intrseq = 0; - kwq->kw_pre_intrretbits = 0; - kwq->kw_pre_intrtype = 0; + // If we had writer-owner information from the + // rwlock then we could use the turnstile to push on it. For now, only + // plain mutexes use it. + return (_kwq_type(kwq) == KSYN_WQTYPE_MTX); } -static void -CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq) -{ - if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) { - if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) { - panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount); - } - }; - if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) { - kwq->kw_nextseqword = PTHRW_RWS_INIT; - kwq->kw_overlapwatch = 0; - }; - CLEAR_PREPOST_BITS(kwq); - kwq->kw_lastunlockseq = PTHRW_RWL_INIT; - kwq->kw_lastseqword = PTHRW_RWS_INIT; - CLEAR_INTR_PREPOST_BITS(kwq); - kwq->kw_lword = 0; - kwq->kw_uword = 0; - kwq->kw_sword = PTHRW_RWS_INIT; -} +#define KW_UNLOCK_PREPOST 0x01 +#define KW_UNLOCK_PREPOST_READLOCK 0x08 +#define KW_UNLOCK_PREPOST_WRLOCK 0x20 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset); static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq); @@ -272,13 +253,11 @@ static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) static int _wait_result_to_errno(wait_result_t result); -static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t, block_hint_t); -static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t); +static int ksyn_wait(ksyn_wait_queue_t, kwq_queue_type_t, uint32_t, int, uint64_t, uint16_t, thread_continue_t, block_hint_t); +static kern_return_t ksyn_signal(ksyn_wait_queue_t, kwq_queue_type_t, ksyn_waitq_element_t, uint32_t); static void ksyn_freeallkwe(ksyn_queue_t kq); -static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t); -static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost); -static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t); +static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t, thread_t *); static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen); @@ -299,8 +278,10 @@ static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep); static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq); -static void psynch_cvcontinue(void *, wait_result_t); -static void psynch_mtxcontinue(void *, wait_result_t); +static void __dead2 psynch_cvcontinue(void *, wait_result_t); +static void __dead2 psynch_mtxcontinue(void *, wait_result_t); +static void __dead2 psynch_rw_rdcontinue(void *, wait_result_t); +static void __dead2 psynch_rw_wrcontinue(void *, wait_result_t); static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp); static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]); @@ -335,6 +316,196 @@ UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc } } +static inline void +_kwq_clear_preposted_wakeup(ksyn_wait_queue_t kwq) +{ + kwq->kw_prepost.lseq = 0; + kwq->kw_prepost.sseq = PTHRW_RWS_INIT; + kwq->kw_prepost.count = 0; +} + +static inline void +_kwq_mark_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t count, + uint32_t lseq, uint32_t sseq) +{ + kwq->kw_prepost.count = count; + kwq->kw_prepost.lseq = lseq; + kwq->kw_prepost.sseq = sseq; +} + +static inline void +_kwq_clear_interrupted_wakeup(ksyn_wait_queue_t kwq) +{ + kwq->kw_intr.type = KWQ_INTR_NONE; + kwq->kw_intr.count = 0; + kwq->kw_intr.seq = 0; + kwq->kw_intr.returnbits = 0; +} + +static inline void +_kwq_mark_interruped_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type, + uint32_t count, uint32_t lseq, uint32_t returnbits) +{ + kwq->kw_intr.count = count; + kwq->kw_intr.seq = lseq; + kwq->kw_intr.returnbits = returnbits; + kwq->kw_intr.type = type; +} + +static void +_kwq_destroy(ksyn_wait_queue_t kwq) +{ + if (kwq->kw_owner) { + thread_deallocate(kwq->kw_owner); + } + lck_spin_destroy(&kwq->kw_lock, pthread_lck_grp); + zfree(kwq_zone, kwq); +} + +#define KWQ_SET_OWNER_TRANSFER_REF 0x1 + +static inline thread_t +_kwq_set_owner(ksyn_wait_queue_t kwq, thread_t new_owner, int flags) +{ + thread_t old_owner = kwq->kw_owner; + if (old_owner == new_owner) { + if (flags & KWQ_SET_OWNER_TRANSFER_REF) return new_owner; + return THREAD_NULL; + } + if ((flags & KWQ_SET_OWNER_TRANSFER_REF) == 0) { + thread_reference(new_owner); + } + kwq->kw_owner = new_owner; + return old_owner; +} + +static inline thread_t +_kwq_clear_owner(ksyn_wait_queue_t kwq) +{ + return _kwq_set_owner(kwq, THREAD_NULL, KWQ_SET_OWNER_TRANSFER_REF); +} + +static inline void +_kwq_cleanup_old_owner(thread_t *thread) +{ + if (*thread) { + thread_deallocate(*thread); + *thread = THREAD_NULL; + } +} + +static void +CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq) +{ + if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) { + if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) { + panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount); + } + }; + if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) { + kwq->kw_nextseqword = PTHRW_RWS_INIT; + kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD; + }; + _kwq_clear_preposted_wakeup(kwq); + kwq->kw_lastunlockseq = PTHRW_RWL_INIT; + kwq->kw_lastseqword = PTHRW_RWS_INIT; + _kwq_clear_interrupted_wakeup(kwq); + kwq->kw_lword = 0; + kwq->kw_uword = 0; + kwq->kw_sword = PTHRW_RWS_INIT; +} + +static bool +_kwq_handle_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t type, + uint32_t lseq, uint32_t *retval) +{ + if (kwq->kw_prepost.count == 0 || + !is_seqlower_eq(lseq, kwq->kw_prepost.lseq)) { + return false; + } + + kwq->kw_prepost.count--; + if (kwq->kw_prepost.count > 0) { + return false; + } + + int error, should_block = 0; + uint32_t updatebits = 0; + uint32_t pp_lseq = kwq->kw_prepost.lseq; + uint32_t pp_sseq = kwq->kw_prepost.sseq; + _kwq_clear_preposted_wakeup(kwq); + + kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; + + error = kwq_handle_unlock(kwq, pp_lseq, pp_sseq, &updatebits, + (type | KW_UNLOCK_PREPOST), &should_block, lseq); + if (error) { + panic("_kwq_handle_preposted_wakeup: kwq_handle_unlock failed %d", + error); + } + + if (should_block) { + return false; + } + *retval = updatebits; + return true; +} + +static bool +_kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t type, uint32_t lgenval, + uint32_t rw_wc, uint32_t *retval) +{ + int res = 0; + + // overlaps only occur on read lockers + if (type != PTH_RW_TYPE_READ) { + return false; + } + + // check for overlap and no pending W bit (indicates writers) + if ((kwq->kw_kflags & KSYN_KWF_OVERLAP_GUARD) && + !is_rws_savemask_set(rw_wc) && !is_rwl_wbit_set(lgenval)) { + /* overlap is set, so no need to check for valid state for overlap */ + + if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) { + /* increase the next expected seq by one */ + kwq->kw_nextseqword += PTHRW_INC; + /* set count by one & bits from the nextseq and add M bit */ + *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT); + res = 1; + } + } + return res; +} + +static inline bool +_kwq_is_used(ksyn_wait_queue_t kwq) +{ + return (kwq->kw_inqueue != 0 || kwq->kw_prepost.count != 0 || + kwq->kw_intr.count != 0); +} + +/* + * consumes a pending interrupted waiter, returns true if the current + * thread should return back to userspace because it was previously + * interrupted. + */ +static inline bool +_kwq_handle_interrupted_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type, + uint32_t lseq, uint32_t *retval) +{ + if (kwq->kw_intr.count != 0 && kwq->kw_intr.type == type && + (!kwq->kw_intr.seq || is_seqlower_eq(lseq, kwq->kw_intr.seq))) { + kwq->kw_intr.count--; + *retval = kwq->kw_intr.returnbits; + if (kwq->kw_intr.returnbits == 0) { + _kwq_clear_interrupted_wakeup(kwq); + } + return true; + } + return false; +} + static void pthread_list_lock(void) { @@ -350,98 +521,117 @@ pthread_list_unlock(void) static void ksyn_wqlock(ksyn_wait_queue_t kwq) { - - lck_mtx_lock(&kwq->kw_lock); + lck_spin_lock(&kwq->kw_lock); } static void ksyn_wqunlock(ksyn_wait_queue_t kwq) { - lck_mtx_unlock(&kwq->kw_lock); + lck_spin_unlock(&kwq->kw_lock); } - /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */ static uint32_t -_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags) +_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, + int flags) { kern_return_t ret; uint32_t returnbits = 0; - int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT; + uint32_t updatebits = 0; + int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) == + _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; uint32_t nextgen = (ugen + PTHRW_INC); + thread_t old_owner = THREAD_NULL; ksyn_wqlock(kwq); kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK); - uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT); redrive: + updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | + (PTH_RWL_EBIT | PTH_RWL_KBIT); + if (firstfit) { if (kwq->kw_inqueue == 0) { - // not set or the new lock sequence is higher - if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) { - kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK); - } - kwq->kw_pre_rwwc = 1; - ksyn_mtx_drop_qos_override(kwq); - kwq->kw_owner = 0; - // indicate prepost content in kernel - returnbits = mgen | PTH_RWL_PBIT; + uint32_t count = kwq->kw_prepost.count + 1; + // Increment the number of preposters we have waiting + _kwq_mark_preposted_wakeup(kwq, count, mgen & PTHRW_COUNT_MASK, 0); + // We don't know the current owner as we've determined this mutex + // drop should have a preposted locker inbound into the kernel but + // we have no way of knowing who it is. When it arrives, the lock + // path will update the turnstile owner and return it to userspace. + old_owner = _kwq_clear_owner(kwq); + pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL, + &kwq->kw_turnstile); + PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr, + kwq->kw_prepost.lseq, count, 0); } else { // signal first waiter - ret = ksyn_mtxsignal(kwq, NULL, updatebits); + ret = ksyn_mtxsignal(kwq, NULL, updatebits, &old_owner); if (ret == KERN_NOT_WAITING) { + // ksyn_mtxsignal attempts to signal + // the thread but it sets up the turnstile inheritor first. + // That means we can't redrive the mutex in a loop without + // dropping the wq lock and cleaning up the turnstile state. + ksyn_wqunlock(kwq); + pthread_kern->psynch_wait_cleanup(); + _kwq_cleanup_old_owner(&old_owner); + ksyn_wqlock(kwq); goto redrive; } } } else { - int prepost = 0; + bool prepost = false; if (kwq->kw_inqueue == 0) { // No waiters in the queue. - prepost = 1; + prepost = true; } else { - uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK); + uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum & PTHRW_COUNT_MASK); if (low_writer == nextgen) { /* next seq to be granted found */ /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */ - ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT); + ret = ksyn_mtxsignal(kwq, NULL, + updatebits | PTH_RWL_MTX_WAIT, &old_owner); if (ret == KERN_NOT_WAITING) { /* interrupt post */ - kwq->kw_pre_intrcount = 1; - kwq->kw_pre_intrseq = nextgen; - kwq->kw_pre_intrretbits = updatebits; - kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE; + _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1, + nextgen, updatebits); } - } else if (is_seqhigher(low_writer, nextgen)) { - prepost = 1; + prepost = true; } else { //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n"); ksyn_waitq_element_t kwe; - kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen); + kwe = ksyn_queue_find_seq(kwq, + &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], nextgen); if (kwe != NULL) { /* next seq to be granted found */ /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */ - ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT); + ret = ksyn_mtxsignal(kwq, kwe, + updatebits | PTH_RWL_MTX_WAIT, &old_owner); if (ret == KERN_NOT_WAITING) { goto redrive; } } else { - prepost = 1; + prepost = true; } } } if (prepost) { - ksyn_mtx_drop_qos_override(kwq); - kwq->kw_owner = 0; - if (++kwq->kw_pre_rwwc > 1) { + if (kwq->kw_prepost.count != 0) { __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n"); } else { - kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); + _kwq_mark_preposted_wakeup(kwq, 1, nextgen & PTHRW_COUNT_MASK, + 0); } + old_owner = _kwq_clear_owner(kwq); + pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL, + &kwq->kw_turnstile); } } - + ksyn_wqunlock(kwq); + pthread_kern->psynch_wait_cleanup(); + _kwq_cleanup_old_owner(&old_owner); ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP); return returnbits; } @@ -460,354 +650,216 @@ _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval) return res; } -static int -_ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq, - uint32_t type, - uint32_t lockseq, - uint32_t *retval) -{ - int res = 0; - if (kwq->kw_pre_intrcount != 0 && - kwq->kw_pre_intrtype == type && - (kwq->kw_pre_intrseq == 0 || is_seqlower_eq(lockseq, kwq->kw_pre_intrseq))) { - kwq->kw_pre_intrcount--; - *retval = kwq->kw_pre_intrretbits; - if (kwq->kw_pre_intrcount == 0) { - CLEAR_INTR_PREPOST_BITS(kwq); - } - res = 1; - } - return res; -} - -static int -_ksyn_handle_overlap(ksyn_wait_queue_t kwq, - uint32_t lgenval, - uint32_t rw_wc, - uint32_t *retval) +/* + * psynch_mutexwait: This system call is used for contended psynch mutexes to + * block. + */ +int +_psynch_mutexwait(__unused proc_t p, user_addr_t mutex, uint32_t mgen, + uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval) { - int res = 0; - - // check for overlap and no pending W bit (indicates writers) - if (kwq->kw_overlapwatch != 0 && - (rw_wc & PTHRW_RWS_SAVEMASK) == 0 && - (lgenval & PTH_RWL_WBIT) == 0) { - /* overlap is set, so no need to check for valid state for overlap */ + ksyn_wait_queue_t kwq; + int error = 0; + int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) + == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; + int ins_flags = SEQFIT; + uint32_t lseq = (mgen & PTHRW_COUNT_MASK); + uint32_t updatebits = 0; + thread_t tid_th = THREAD_NULL, old_owner = THREAD_NULL; - if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) { - /* increase the next expected seq by one */ - kwq->kw_nextseqword += PTHRW_INC; - /* set count by one & bits from the nextseq and add M bit */ - *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT); - res = 1; - } + if (firstfit) { + /* first fit */ + ins_flags = FIRSTFIT; } - return res; -} -static int -_ksyn_handle_prepost(ksyn_wait_queue_t kwq, - uint32_t type, - uint32_t lockseq, - uint32_t *retval) -{ - int res = 0; - if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) { - kwq->kw_pre_rwwc--; - if (kwq->kw_pre_rwwc == 0) { - uint32_t preseq = kwq->kw_pre_lockseq; - uint32_t prerw_wc = kwq->kw_pre_sseq; - CLEAR_PREPOST_BITS(kwq); - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){ - kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; - } + error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, + (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX), &kwq); + if (error != 0) { + return error; + } - int error, block; - uint32_t updatebits; - error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq); - if (error != 0) { - panic("kwq_handle_unlock failed %d\n", error); - } +again: + ksyn_wqlock(kwq); - if (block == 0) { - *retval = updatebits; - res = 1; - } - } + if (_kwq_handle_interrupted_wakeup(kwq, KWQ_INTR_WRITE, lseq, retval)) { + old_owner = _kwq_set_owner(kwq, current_thread(), 0); + pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner, + &kwq->kw_turnstile); + ksyn_wqunlock(kwq); + _kwq_cleanup_old_owner(&old_owner); + goto out; } - return res; -} -/* Helpers for QoS override management. Only applies to mutexes */ -static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost) -{ - if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) { - boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE; - int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread()); - - kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override); - - if (prepost && kwq->kw_inqueue == 0) { - // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an - // override, because the receiving owner may not re-enter the kernel to signal someone else if it is - // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner - tid = 0; - } - - if (tid != 0) { - if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) { - // hint continues to be accurate, and a boost was already applied - pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, FALSE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX); - } else { - // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously - boolean_t boostsucceded; - - boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX); - - if (boostsucceded) { - kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED; - } - - if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) { - // the hint did not match the previous owner, so drop overrides - PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0); - pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX); - } - } - } else { - // new hint tells us that we don't know the owner, so drop any existing overrides - kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED; - kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; - - if (wasboosted && (kwq->kw_owner != 0)) { - // the hint did not match the previous owner, so drop overrides - PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0); - pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX); + if (kwq->kw_prepost.count && (firstfit || (lseq == kwq->kw_prepost.lseq))) { + /* got preposted lock */ + kwq->kw_prepost.count--; + + if (!firstfit) { + if (kwq->kw_prepost.count > 0) { + __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n"); + kwq->kw_prepost.lseq += PTHRW_INC; /* look for next one */ + ksyn_wqunlock(kwq); + error = EINVAL; + goto out; } + _kwq_clear_preposted_wakeup(kwq); } - } -} -static boolean_t -ksyn_mtx_transfer_qos_override_begin(ksyn_wait_queue_t kwq, - ksyn_waitq_element_t kwe, uint64_t *kw_owner) -{ - boolean_t needs_commit = FALSE; - if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) { - boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE; - - if (kwq->kw_inqueue > 1) { - boolean_t boostsucceeded; - - // More than one waiter, so resource will still be contended after handing off ownership - boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX); - - if (boostsucceeded) { - kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED; - } + if (kwq->kw_inqueue == 0) { + updatebits = lseq | (PTH_RWL_KBIT | PTH_RWL_EBIT); } else { - // kw_inqueue == 1 to get to this point, which means there will be no contention after this point - kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED; - kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; - } - - // Remove the override that was applied to kw_owner. There may have been a race, - // in which case it may not match the current thread - if (wasboosted) { - if (kwq->kw_owner == 0) { - PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0); - } else if (thread_tid(current_thread()) != kwq->kw_owner) { - PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0); - *kw_owner = kwq->kw_owner; - needs_commit = TRUE; - } else { - *kw_owner = 0; - needs_commit = TRUE; - } + updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | + (PTH_RWL_KBIT | PTH_RWL_EBIT); } - } - return needs_commit; -} - -static void -ksyn_mtx_transfer_qos_override_commit(ksyn_wait_queue_t kwq, uint64_t kw_owner) -{ - struct uthread *uthread = kw_owner ? NULL : current_uthread(); - - pthread_kern->proc_usynch_thread_qos_remove_override_for_resource( - current_task(), uthread, kw_owner, kwq->kw_addr, - THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX); -} + updatebits &= ~PTH_RWL_MTX_WAIT; -static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq) -{ - if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) { - boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE; - - // assume nobody else in queue if this routine was called - kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED; - kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; - - // Remove the override that was applied to kw_owner. There may have been a race, - // in which case it may not match the current thread - if (wasboosted) { - if (kwq->kw_owner == 0) { - PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0); - } else if (thread_tid(current_thread()) != kwq->kw_owner) { - PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0); - pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX); - } else { - pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX); - } + if (updatebits == 0) { + __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n"); } - } -} -/* - * psynch_mutexwait: This system call is used for contended psynch mutexes to block. - */ + PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr, + kwq->kw_prepost.lseq, kwq->kw_prepost.count, 1); -int -_psynch_mutexwait(__unused proc_t p, - user_addr_t mutex, - uint32_t mgen, - uint32_t ugen, - uint64_t tid, - uint32_t flags, - uint32_t *retval) -{ - ksyn_wait_queue_t kwq; - int error=0; - int ins_flags; - - int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT; - uint32_t updatebits = 0; - - uint32_t lockseq = (mgen & PTHRW_COUNT_MASK); - - if (firstfit == 0) { - ins_flags = SEQFIT; - } else { - /* first fit */ - ins_flags = FIRSTFIT; - } - - error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq); - if (error != 0) { - return(error); + old_owner = _kwq_set_owner(kwq, current_thread(), 0); + pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner, + &kwq->kw_turnstile); + + ksyn_wqunlock(kwq); + _kwq_cleanup_old_owner(&old_owner); + *retval = updatebits; + goto out; } - - ksyn_wqlock(kwq); - // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the - // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond - // to a stale snapshot after the lock has subsequently been unlocked by another thread. - if (tid == 0) { + // mutexwait passes in an owner hint at the time userspace contended for + // the mutex, however, the owner tid in the userspace data structure may be + // unset or SWITCHING (-1), or it may correspond to a stale snapshot after + // the lock has subsequently been unlocked by another thread. + if (tid == thread_tid(kwq->kw_owner)) { + // userspace and kernel agree + } else if (tid == 0) { // contender came in before owner could write TID - tid = 0; - } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) { - // owner is stale, someone has come in and unlocked since this contended read the TID, so - // assume what is known in the kernel is accurate - tid = kwq->kw_owner; + // let's assume that what the kernel knows is accurate + // for all we know this waiter came in late in the kernel + } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && + is_seqlower(ugen, kwq->kw_lastunlockseq)) { + // owner is stale, someone has come in and unlocked since this + // contended read the TID, so assume what is known in the kernel is + // accurate } else if (tid == PTHREAD_MTX_TID_SWITCHING) { - // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't - // reached the kernel yet. So assume what is known in the kernel is accurate - tid = kwq->kw_owner; + // userspace didn't know the owner because it was being unlocked, but + // that unlocker hasn't reached the kernel yet. So assume what is known + // in the kernel is accurate } else { - // hint is being passed in for a specific thread, and we have no reason not to trust - // it (like the kernel unlock sequence being higher - } - - - if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) { - ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE); - kwq->kw_owner = thread_tid(current_thread()); - - ksyn_wqunlock(kwq); - goto out; - } - - if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) { - /* got preposted lock */ - kwq->kw_pre_rwwc--; - if (kwq->kw_pre_rwwc == 0) { - CLEAR_PREPOST_BITS(kwq); - if (kwq->kw_inqueue == 0) { - updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT); - } else { - updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT); - } - updatebits &= ~PTH_RWL_MTX_WAIT; - - if (updatebits == 0) { - __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n"); - } - - ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE); - kwq->kw_owner = thread_tid(current_thread()); - - ksyn_wqunlock(kwq); - *retval = updatebits; - goto out; - } else { - __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n"); - kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */ + // hint is being passed in for a specific thread, and we have no reason + // not to trust it (like the kernel unlock sequence being higher) + // + // So resolve the hint to a thread_t if we haven't done so yet + // and redrive as we dropped the lock + if (tid_th == THREAD_NULL) { ksyn_wqunlock(kwq); - error = EINVAL; - goto out; + tid_th = pthread_kern->task_findtid(current_task(), tid); + if (tid_th == THREAD_NULL) tid = 0; + goto again; } + tid_th = _kwq_set_owner(kwq, tid_th, KWQ_SET_OWNER_TRANSFER_REF); } - - ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE); - kwq->kw_owner = tid; - error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue, kThreadWaitPThreadMutex); + if (tid_th) { + // We are on our way to block, and can't drop the spinlock anymore + pthread_kern->thread_deallocate_safe(tid_th); + tid_th = THREAD_NULL; + } + error = ksyn_wait(kwq, KSYN_QUEUE_WRITE, mgen, ins_flags, 0, 0, + psynch_mtxcontinue, kThreadWaitPThreadMutex); // ksyn_wait drops wait queue lock out: - ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); + pthread_kern->psynch_wait_cleanup(); + ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX)); + if (tid_th) { + thread_deallocate(tid_th); + } return error; } -void +void __dead2 psynch_mtxcontinue(void *parameter, wait_result_t result) { uthread_t uth = current_uthread(); ksyn_wait_queue_t kwq = parameter; ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); - + + ksyn_wqlock(kwq); + int error = _wait_result_to_errno(result); if (error != 0) { - ksyn_wqlock(kwq); if (kwe->kwe_kwqqueue) { - ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe); + ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe); } - ksyn_wqunlock(kwq); } else { uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT; pthread_kern->uthread_set_returnval(uth, updatebits); - - if (updatebits == 0) + + if (updatebits == 0) { __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n"); + } } - ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); + + pthread_kern->psynch_wait_complete(kwq, &kwq->kw_turnstile); + + ksyn_wqunlock(kwq); + pthread_kern->psynch_wait_cleanup(); + ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX)); pthread_kern->unix_syscall_return(error); + __builtin_unreachable(); +} + +static void __dead2 +_psynch_rw_continue(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, + wait_result_t result) +{ + uthread_t uth = current_uthread(); + ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); + + ksyn_wqlock(kwq); + + int error = _wait_result_to_errno(result); + if (error != 0) { + if (kwe->kwe_kwqqueue) { + ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe); + } + } else { + pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval); + } + + ksyn_wqunlock(kwq); + ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK)); + + pthread_kern->unix_syscall_return(error); + __builtin_unreachable(); +} + +void __dead2 +psynch_rw_rdcontinue(void *parameter, wait_result_t result) +{ + _psynch_rw_continue(parameter, KSYN_QUEUE_READ, result); +} + +void __dead2 +psynch_rw_wrcontinue(void *parameter, wait_result_t result) +{ + _psynch_rw_continue(parameter, KSYN_QUEUE_WRITE, result); } /* * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes. */ int -_psynch_mutexdrop(__unused proc_t p, - user_addr_t mutex, - uint32_t mgen, - uint32_t ugen, - uint64_t tid __unused, - uint32_t flags, - uint32_t *retval) +_psynch_mutexdrop(__unused proc_t p, user_addr_t mutex, uint32_t mgen, + uint32_t ugen, uint64_t tid __unused, uint32_t flags, uint32_t *retval) { int res; ksyn_wait_queue_t kwq; - + res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq); if (res == 0) { uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags); @@ -821,65 +873,57 @@ _psynch_mutexdrop(__unused proc_t p, } static kern_return_t -ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval) +ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, + uint32_t updateval, thread_t *old_owner) { kern_return_t ret; - boolean_t needs_commit; - uint64_t kw_owner; if (!kwe) { - kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist); + kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_kwelist); if (!kwe) { panic("ksyn_mtxsignal: panic signaling empty queue"); } } - needs_commit = ksyn_mtx_transfer_qos_override_begin(kwq, kwe, &kw_owner); - kwq->kw_owner = kwe->kwe_tid; - - ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval); + PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_START, kwq->kw_addr, kwe, + thread_tid(kwe->kwe_thread), kwq->kw_inqueue); - // if waking the new owner failed, remove any overrides - if (ret != KERN_SUCCESS) { - ksyn_mtx_drop_qos_override(kwq); - kwq->kw_owner = 0; - } else if (needs_commit) { - ksyn_mtx_transfer_qos_override_commit(kwq, kw_owner); + ret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, kwe, updateval); + if (ret == KERN_SUCCESS) { + *old_owner = _kwq_set_owner(kwq, kwe->kwe_thread, 0); + } else { + *old_owner = _kwq_clear_owner(kwq); } + PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_END, kwq->kw_addr, kwe, + ret, 0); return ret; } static void -ksyn_prepost(ksyn_wait_queue_t kwq, - ksyn_waitq_element_t kwe, - uint32_t state, +ksyn_prepost(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t state, uint32_t lockseq) { bzero(kwe, sizeof(*kwe)); kwe->kwe_state = state; kwe->kwe_lockseq = lockseq; kwe->kwe_count = 1; - - (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT); + + (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITE, kwe, lockseq, SEQFIT); kwq->kw_fakecount++; } static void -ksyn_cvsignal(ksyn_wait_queue_t ckwq, - thread_t th, - uint32_t uptoseq, - uint32_t signalseq, - uint32_t *updatebits, - int *broadcast, - ksyn_waitq_element_t *nkwep) +ksyn_cvsignal(ksyn_wait_queue_t ckwq, thread_t th, uint32_t uptoseq, + uint32_t signalseq, uint32_t *updatebits, int *broadcast, + ksyn_waitq_element_t *nkwep) { ksyn_waitq_element_t kwe = NULL; ksyn_waitq_element_t nkwe = NULL; - ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; - + ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE]; + uptoseq &= PTHRW_COUNT_MASK; - + // Find the specified thread to wake. if (th != THREAD_NULL) { uthread_t uth = pthread_kern->get_bsdthread_info(th); @@ -893,7 +937,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq, return; } } - + // If no thread was specified, find any thread to wake (with the right // sequence number). while (th == THREAD_NULL) { @@ -906,13 +950,13 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq, // reacquiring the lock after allocation in // case anything new shows up. ksyn_wqunlock(ckwq); - nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone); + nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone); ksyn_wqlock(ckwq); } else { break; } } - + if (kwe != NULL) { // If we found a thread to wake... if (kwe->kwe_state == KWE_THREAD_INWAIT) { @@ -928,7 +972,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq, */ *broadcast = 1; } else { - (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT); + (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT); *updatebits += PTHRW_INC; } } else if (kwe->kwe_state == KWE_THREAD_PREPOST) { @@ -944,7 +988,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq, * If we allocated a new kwe above but then found a different kwe to * use then we need to deallocate the spare one. */ - pthread_kern->zfree(kwe_zone, nkwe); + zfree(kwe_zone, nkwe); nkwe = NULL; } } else if (nkwe != NULL) { @@ -954,19 +998,14 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq, } else { panic("failed to allocate kwe\n"); } - + *nkwep = nkwe; } static int -__psynch_cvsignal(user_addr_t cv, - uint32_t cgen, - uint32_t cugen, - uint32_t csgen, - uint32_t flags, - int broadcast, - mach_port_name_t threadport, - uint32_t *retval) +__psynch_cvsignal(user_addr_t cv, uint32_t cgen, uint32_t cugen, + uint32_t csgen, uint32_t flags, int broadcast, + mach_port_name_t threadport, uint32_t *retval) { int error = 0; thread_t th = THREAD_NULL; @@ -997,11 +1036,16 @@ __psynch_cvsignal(user_addr_t cv, // update L, U and S... UPDATE_CVKWQ(kwq, cgen, cugen, csgen); - + + PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_START, kwq->kw_addr, + fromseq, uptoseq, broadcast); + if (!broadcast) { // No need to signal if the CV is already balanced. if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) { - ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe); + ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, + &broadcast, &nkwe); + PTHREAD_TRACE(psynch_cvar_signal, kwq->kw_addr, broadcast, 0,0); } } @@ -1013,11 +1057,16 @@ __psynch_cvsignal(user_addr_t cv, // set C or P bits and free if needed ksyn_cvupdate_fixup(kwq, &updatebits); *retval = updatebits; + + PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_END, kwq->kw_addr, + updatebits, 0, 0); ksyn_wqunlock(kwq); + + pthread_kern->psynch_wait_cleanup(); if (nkwe != NULL) { - pthread_kern->zfree(kwe_zone, nkwe); + zfree(kwe_zone, nkwe); } ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR)); @@ -1034,15 +1083,9 @@ __psynch_cvsignal(user_addr_t cv, * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars. */ int -_psynch_cvbroad(__unused proc_t p, - user_addr_t cv, - uint64_t cvlsgen, - uint64_t cvudgen, - uint32_t flags, - __unused user_addr_t mutex, - __unused uint64_t mugen, - __unused uint64_t tid, - uint32_t *retval) +_psynch_cvbroad(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen, + uint64_t cvudgen, uint32_t flags, __unused user_addr_t mutex, + __unused uint64_t mugen, __unused uint64_t tid, uint32_t *retval) { uint32_t diffgen = cvudgen & 0xffffffff; uint32_t count = diffgen >> PTHRW_COUNT_SHIFT; @@ -1062,15 +1105,9 @@ _psynch_cvbroad(__unused proc_t p, * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars. */ int -_psynch_cvsignal(__unused proc_t p, - user_addr_t cv, - uint64_t cvlsgen, - uint32_t cvugen, - int threadport, - __unused user_addr_t mutex, - __unused uint64_t mugen, - __unused uint64_t tid, - uint32_t flags, +_psynch_cvsignal(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen, + uint32_t cvugen, int threadport, __unused user_addr_t mutex, + __unused uint64_t mugen, __unused uint64_t tid, uint32_t flags, uint32_t *retval) { uint32_t csgen = (cvlsgen >> 32) & 0xffffffff; @@ -1083,16 +1120,9 @@ _psynch_cvsignal(__unused proc_t p, * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel. */ int -_psynch_cvwait(__unused proc_t p, - user_addr_t cv, - uint64_t cvlsgen, - uint32_t cvugen, - user_addr_t mutex, - uint64_t mugen, - uint32_t flags, - int64_t sec, - uint32_t nsec, - uint32_t *retval) +_psynch_cvwait(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen, + uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags, + int64_t sec, uint32_t nsec, uint32_t *retval) { int error = 0; uint32_t updatebits = 0; @@ -1118,6 +1148,8 @@ _psynch_cvwait(__unused proc_t p, __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n"); return EINVAL; } + + PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_START, cv, mutex, cgen, 0); error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq); if (error != 0) { @@ -1125,7 +1157,8 @@ _psynch_cvwait(__unused proc_t p, } if (mutex != 0) { - error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL); + uint32_t mutexrv = 0; + error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, &mutexrv); if (error != 0) { goto out; } @@ -1137,7 +1170,7 @@ _psynch_cvwait(__unused proc_t p, UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen); /* Look for the sequence for prepost (or conflicting thread */ - ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; + ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE]; kwe = ksyn_queue_find_cvpreposeq(kq, lockseq); if (kwe != NULL) { if (kwe->kwe_state == KWE_THREAD_PREPOST) { @@ -1171,7 +1204,7 @@ _psynch_cvwait(__unused proc_t p, } if (error == 0) { - updatebits = PTHRW_INC; + updatebits |= PTHRW_INC; ckwq->kw_sword += PTHRW_INC; /* set C or P bits and free if needed */ @@ -1180,45 +1213,54 @@ _psynch_cvwait(__unused proc_t p, } } else { uint64_t abstime = 0; + uint16_t kwe_flags = 0; if (sec != 0 || (nsec & 0x3fffffff) != 0) { struct timespec ts; ts.tv_sec = (__darwin_time_t)sec; ts.tv_nsec = (nsec & 0x3fffffff); - nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime); + nanoseconds_to_absolutetime( + (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime); clock_absolutetime_interval_to_deadline(abstime, &abstime); } + + PTHREAD_TRACE(psynch_cvar_kwait, cv, mutex, kwe_flags, 1); - error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue, kThreadWaitPThreadCondVar); + error = ksyn_wait(ckwq, KSYN_QUEUE_WRITE, cgen, SEQFIT, abstime, + kwe_flags, psynch_cvcontinue, kThreadWaitPThreadCondVar); // ksyn_wait drops wait queue lock } ksyn_wqunlock(ckwq); - + if (nkwe != NULL) { - pthread_kern->zfree(kwe_zone, nkwe); + zfree(kwe_zone, nkwe); } out: + + PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, cv, error, updatebits, 2); + ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); return error; } -void +void __dead2 psynch_cvcontinue(void *parameter, wait_result_t result) { uthread_t uth = current_uthread(); ksyn_wait_queue_t ckwq = parameter; ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); - + int error = _wait_result_to_errno(result); if (error != 0) { ksyn_wqlock(ckwq); /* just in case it got woken up as we were granting */ - pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval); + int retval = kwe->kwe_psynchretval; + pthread_kern->uthread_set_returnval(uth, retval); if (kwe->kwe_kwqqueue) { - ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe); + ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe); } if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) { /* the condition var granted. @@ -1231,46 +1273,48 @@ psynch_cvcontinue(void *parameter, wait_result_t result) /* set C and P bits, in the local error */ if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { - error |= ECVCERORR; + PTHREAD_TRACE(psynch_cvar_zeroed, ckwq->kw_addr, + ckwq->kw_lword, ckwq->kw_sword, ckwq->kw_inqueue); + error |= ECVCLEARED; if (ckwq->kw_inqueue != 0) { - ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1); + ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 1); } ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0; ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT; } else { /* everythig in the queue is a fake entry ? */ if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) { - error |= ECVPERORR; + error |= ECVPREPOST; } } } ksyn_wqunlock(ckwq); + + PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr, + error, 0, 3); } else { int val = 0; // PTH_RWL_MTX_WAIT is removed if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) { val = PTHRW_INC | PTH_RWS_CV_CBIT; } + PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr, + val, 0, 4); pthread_kern->uthread_set_returnval(uth, val); } ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); pthread_kern->unix_syscall_return(error); + __builtin_unreachable(); } /* * psynch_cvclrprepost: This system call clears pending prepost if present. */ int -_psynch_cvclrprepost(__unused proc_t p, - user_addr_t cv, - uint32_t cvgen, - uint32_t cvugen, - uint32_t cvsgen, - __unused uint32_t prepocnt, - uint32_t preposeq, - uint32_t flags, - int *retval) +_psynch_cvclrprepost(__unused proc_t p, user_addr_t cv, uint32_t cvgen, + uint32_t cvugen, uint32_t cvsgen, __unused uint32_t prepocnt, + uint32_t preposeq, uint32_t flags, int *retval) { int error = 0; int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX); @@ -1279,7 +1323,8 @@ _psynch_cvclrprepost(__unused proc_t p, *retval = 0; - error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq); + error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, + &kwq); if (error != 0) { return error; } @@ -1287,16 +1332,19 @@ _psynch_cvclrprepost(__unused proc_t p, ksyn_wqlock(kwq); if (mutex) { - int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT; - if (firstfit && kwq->kw_pre_rwwc != 0) { - if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) { - // clear prepost - kwq->kw_pre_rwwc = 0; - kwq->kw_pre_lockseq = 0; + int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) + == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; + if (firstfit && kwq->kw_prepost.count) { + if (is_seqlower_eq(kwq->kw_prepost.lseq, cvgen)) { + PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr, + kwq->kw_prepost.lseq, 0, 2); + _kwq_clear_preposted_wakeup(kwq); } } } else { - ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0); + PTHREAD_TRACE(psynch_cvar_clrprepost, kwq->kw_addr, wqtype, + preposeq, 0); + ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITE, preposeq, 0); } ksyn_wqunlock(kwq); @@ -1307,50 +1355,47 @@ _psynch_cvclrprepost(__unused proc_t p, /* ***************** pthread_rwlock ************************ */ static int -__psynch_rw_lock(int type, - user_addr_t rwlock, - uint32_t lgenval, - uint32_t ugenval, - uint32_t rw_wc, - int flags, - uint32_t *retval) +__psynch_rw_lock(int type, user_addr_t rwlock, uint32_t lgenval, + uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval) { - int prepost_type, kqi; + uint32_t lockseq = lgenval & PTHRW_COUNT_MASK; + ksyn_wait_queue_t kwq; + int error, prepost_type, kqi; + thread_continue_t tc; if (type == PTH_RW_TYPE_READ) { prepost_type = KW_UNLOCK_PREPOST_READLOCK; kqi = KSYN_QUEUE_READ; + tc = psynch_rw_rdcontinue; } else { prepost_type = KW_UNLOCK_PREPOST_WRLOCK; - kqi = KSYN_QUEUE_WRITER; + kqi = KSYN_QUEUE_WRITE; + tc = psynch_rw_wrcontinue; } - uint32_t lockseq = lgenval & PTHRW_COUNT_MASK; + error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, + (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { + return error; + } - int error; - ksyn_wait_queue_t kwq; - error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); - if (error == 0) { - ksyn_wqlock(kwq); - _ksyn_check_init(kwq, lgenval); - if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) || - // handle overlap first as they are not counted against pre_rwwc - (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) || - _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) { - ksyn_wqunlock(kwq); - } else { - block_hint_t block_hint = type == PTH_RW_TYPE_READ ? - kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite; - error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL, block_hint); - // ksyn_wait drops wait queue lock - if (error == 0) { - uthread_t uth = current_uthread(); - ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); - *retval = kwe->kwe_psynchretval; - } - } - ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK)); + ksyn_wqlock(kwq); + _ksyn_check_init(kwq, lgenval); + if (_kwq_handle_interrupted_wakeup(kwq, type, lockseq, retval) || + // handle overlap first as they are not counted against pre_rwwc + // handle_overlap uses the flags in lgenval (vs. lockseq) + _kwq_handle_overlap(kwq, type, lgenval, rw_wc, retval) || + _kwq_handle_preposted_wakeup(kwq, prepost_type, lockseq, retval)) { + ksyn_wqunlock(kwq); + goto out; } + + block_hint_t block_hint = type == PTH_RW_TYPE_READ ? + kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite; + error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, 0, tc, block_hint); + // ksyn_wait drops wait queue lock +out: + ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK)); return error; } @@ -1358,28 +1403,20 @@ __psynch_rw_lock(int type, * psynch_rw_rdlock: This system call is used for psync rwlock readers to block. */ int -_psynch_rw_rdlock(__unused proc_t p, - user_addr_t rwlock, - uint32_t lgenval, - uint32_t ugenval, - uint32_t rw_wc, - int flags, - uint32_t *retval) +_psynch_rw_rdlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval, + uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval) { - return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval); + return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, + flags, retval); } /* * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block. */ int -_psynch_rw_longrdlock(__unused proc_t p, - __unused user_addr_t rwlock, - __unused uint32_t lgenval, - __unused uint32_t ugenval, - __unused uint32_t rw_wc, - __unused int flags, - __unused uint32_t *retval) +_psynch_rw_longrdlock(__unused proc_t p, __unused user_addr_t rwlock, + __unused uint32_t lgenval, __unused uint32_t ugenval, + __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval) { return ESRCH; } @@ -1389,28 +1426,20 @@ _psynch_rw_longrdlock(__unused proc_t p, * psynch_rw_wrlock: This system call is used for psync rwlock writers to block. */ int -_psynch_rw_wrlock(__unused proc_t p, - user_addr_t rwlock, - uint32_t lgenval, - uint32_t ugenval, - uint32_t rw_wc, - int flags, - uint32_t *retval) +_psynch_rw_wrlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval, + uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval) { - return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval); + return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, + rw_wc, flags, retval); } /* * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block. */ int -_psynch_rw_yieldwrlock(__unused proc_t p, - __unused user_addr_t rwlock, - __unused uint32_t lgenval, - __unused uint32_t ugenval, - __unused uint32_t rw_wc, - __unused int flags, - __unused uint32_t *retval) +_psynch_rw_yieldwrlock(__unused proc_t p, __unused user_addr_t rwlock, + __unused uint32_t lgenval, __unused uint32_t ugenval, + __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval) { return ESRCH; } @@ -1420,13 +1449,8 @@ _psynch_rw_yieldwrlock(__unused proc_t p, * reader/writer variety lock. */ int -_psynch_rw_unlock(__unused proc_t p, - user_addr_t rwlock, - uint32_t lgenval, - uint32_t ugenval, - uint32_t rw_wc, - int flags, - uint32_t *retval) +_psynch_rw_unlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval, + uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval) { int error = 0; ksyn_wait_queue_t kwq; @@ -1436,7 +1460,8 @@ _psynch_rw_unlock(__unused proc_t p, uint32_t curgen = lgenval & PTHRW_COUNT_MASK; int clearedkflags = 0; - error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq); + error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, + (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq); if (error != 0) { return(error); } @@ -1445,7 +1470,8 @@ _psynch_rw_unlock(__unused proc_t p, int isinit = _ksyn_check_init(kwq, lgenval); /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */ - if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) { + if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && + (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) { error = 0; goto out; } @@ -1466,7 +1492,7 @@ _psynch_rw_unlock(__unused proc_t p, /* can handle unlock now */ - CLEAR_PREPOST_BITS(kwq); + _kwq_clear_preposted_wakeup(kwq); error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0); #if __TESTPANICS__ @@ -1479,26 +1505,25 @@ out: *retval = updatebits; } - // If any of the wakeups failed because they already - // returned to userspace because of a signal then we need to ensure that the - // reset state is not cleared when that thread returns. Otherwise, + // If any of the wakeups failed because they + // already returned to userspace because of a signal then we need to ensure + // that the reset state is not cleared when that thread returns. Otherwise, // _pthread_rwlock_lock will clear the interrupted state before it is read. - if (clearedkflags != 0 && kwq->kw_pre_intrcount > 0) { + if (clearedkflags != 0 && kwq->kw_intr.count > 0) { kwq->kw_kflags |= KSYN_KWF_INITCLEARED; } ksyn_wqunlock(kwq); + pthread_kern->psynch_wait_cleanup(); ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK)); return(error); prepost: /* update if the new seq is higher than prev prepost, or first set */ - if (is_rws_setseq(kwq->kw_pre_sseq) || - is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) { - kwq->kw_pre_rwwc = (diff - count); - kwq->kw_pre_lockseq = curgen; - kwq->kw_pre_sseq = rw_wc; + if (is_rws_sbit_set(kwq->kw_prepost.sseq) || + is_seqhigher_eq(rw_wc, kwq->kw_prepost.sseq)) { + _kwq_mark_preposted_wakeup(kwq, diff - count, curgen, rw_wc); updatebits = lgenval; /* let this not do unlock handling */ } error = 0; @@ -1526,13 +1551,9 @@ _pth_proc_hashinit(proc_t p) static int -ksyn_wq_hash_lookup(user_addr_t uaddr, - proc_t p, - int flags, - ksyn_wait_queue_t *out_kwq, - struct pthhashhead **out_hashptr, - uint64_t *out_object, - uint64_t *out_offset) +ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, + ksyn_wait_queue_t *out_kwq, struct pthhashhead **out_hashptr, + uint64_t *out_object, uint64_t *out_offset) { int res = 0; ksyn_wait_queue_t kwq; @@ -1593,9 +1614,8 @@ _pth_proc_hashdelete(proc_t p) pthread_list_unlock(); /* release fake entries if present for cvars */ if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0)) - ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]); - lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); - pthread_kern->zfree(kwq_zone, kwq); + ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE]); + _kwq_destroy(kwq); pthread_list_lock(); } } @@ -1611,14 +1631,49 @@ ksyn_freeallkwe(ksyn_queue_t kq) while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) { TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); if (kwe->kwe_state != KWE_THREAD_INWAIT) { - pthread_kern->zfree(kwe_zone, kwe); + zfree(kwe_zone, kwe); } } } +static inline void +_kwq_report_inuse(ksyn_wait_queue_t kwq) +{ + if (kwq->kw_prepost.count != 0) { + __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [pre %d:0x%x:0x%x]", + (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_prepost.count, + kwq->kw_prepost.lseq, kwq->kw_prepost.sseq); + PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, + kwq->kw_type, 1, 0); + } + if (kwq->kw_intr.count != 0) { + __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [intr %d:0x%x:0x%x:0x%x]", + (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_intr.count, + kwq->kw_intr.type, kwq->kw_intr.seq, + kwq->kw_intr.returnbits); + PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, + kwq->kw_type, 2, 0); + } + if (kwq->kw_iocount) { + __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [ioc %d:%d]", + (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_iocount, + kwq->kw_dropcount); + PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, + kwq->kw_type, 3, 0); + } + if (kwq->kw_inqueue) { + __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [inq %d:%d]", + (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_inqueue, + kwq->kw_fakecount); + PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, kwq->kw_type, + 4, 0); + } +} + /* find kernel waitqueue, if not present create one. Grants a reference */ int -ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp) +ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, + int flags, int wqtype, ksyn_wait_queue_t *kwqp) { int res = 0; ksyn_wait_queue_t kwq = NULL; @@ -1636,7 +1691,8 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int while (res == 0) { pthread_list_lock(); - res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset); + res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, + &object, &offset); if (res != 0) { pthread_list_unlock(); break; @@ -1645,13 +1701,13 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int // Drop the lock to allocate a new kwq and retry. pthread_list_unlock(); - nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone); + nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone); bzero(nkwq, sizeof(struct ksyn_wait_queue)); int i; for (i = 0; i < KSYN_QUEUE_MAX; i++) { ksyn_queue_init(&nkwq->kw_ksynqueues[i]); } - lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr); + lck_spin_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr); continue; } else if (kwq == NULL && nkwq != NULL) { // Still not found, add the new kwq to the hash. @@ -1671,21 +1727,23 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int kwq->kw_pflags &= ~KSYN_WQ_FLIST; } if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) { - if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) { + if (!_kwq_is_used(kwq)) { if (kwq->kw_iocount == 0) { kwq->kw_type = 0; // mark for reinitialization - } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) { + } else if (kwq->kw_iocount == 1 && + kwq->kw_dropcount == kwq->kw_iocount) { /* if all users are unlockers then wait for it to finish */ kwq->kw_pflags |= KSYN_WQ_WAITING; // Drop the lock and wait for the kwq to be free. - (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0); + (void)msleep(&kwq->kw_pflags, pthread_list_mlock, + PDROP, "ksyn_wqfind", 0); continue; } else { - __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n"); + _kwq_report_inuse(kwq); res = EINVAL; } } else { - __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n"); + _kwq_report_inuse(kwq); res = EINVAL; } } @@ -1700,9 +1758,13 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int kwq->kw_lword = mgen; kwq->kw_uword = ugen; kwq->kw_sword = sgen; - kwq->kw_owner = 0; + kwq->kw_owner = THREAD_NULL; kwq->kw_kflags = 0; kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED; + PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_START, uaddr, + kwq->kw_type, kwq, 0); + PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_END, uaddr, + mgen, ugen, sgen); } kwq->kw_iocount++; if (wqtype == KSYN_WQTYPE_MUTEXDROP) { @@ -1716,8 +1778,7 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int *kwqp = kwq; } if (nkwq) { - lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp); - pthread_kern->zfree(kwq_zone, nkwq); + _kwq_destroy(nkwq); } return res; } @@ -1740,7 +1801,16 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype) wakeup(&kwq->kw_pflags); } - if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) { + if (!_kwq_is_used(kwq)) { + if (kwq->kw_turnstile) { + panic("kw_turnstile still non-null upon release"); + } + + PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_START, + kwq->kw_addr, kwq->kw_type, qfreenow, 0); + PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_END, + kwq->kw_addr, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword); + if (qfreenow == 0) { microuptime(&kwq->kw_ts); LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list); @@ -1762,8 +1832,7 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype) } pthread_list_unlock(); if (free_elem != NULL) { - lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp); - pthread_kern->zfree(kwq_zone, free_elem); + _kwq_destroy(free_elem); } } @@ -1771,7 +1840,7 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype) void psynch_wq_cleanup(__unused void *param, __unused void * param1) { - ksyn_wait_queue_t kwq; + ksyn_wait_queue_t kwq, tmp; struct timeval t; int reschedule = 0; uint64_t deadline = 0; @@ -1783,7 +1852,7 @@ psynch_wq_cleanup(__unused void *param, __unused void * param1) microuptime(&t); LIST_FOREACH(kwq, &pth_free_list, kw_list) { - if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) { + if (_kwq_is_used(kwq) || kwq->kw_iocount != 0) { // still in use continue; } @@ -1810,10 +1879,8 @@ psynch_wq_cleanup(__unused void *param, __unused void * param1) } pthread_list_unlock(); - while ((kwq = LIST_FIRST(&freelist)) != NULL) { - LIST_REMOVE(kwq, kw_list); - lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); - pthread_kern->zfree(kwq_zone, kwq); + LIST_FOREACH_SAFE(kwq, &freelist, kw_list, tmp) { + _kwq_destroy(kwq); } } @@ -1833,25 +1900,25 @@ _wait_result_to_errno(wait_result_t result) } int -ksyn_wait(ksyn_wait_queue_t kwq, - int kqi, - uint32_t lockseq, - int fit, - uint64_t abstime, - thread_continue_t continuation, - block_hint_t block_hint) +ksyn_wait(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, uint32_t lockseq, + int fit, uint64_t abstime, uint16_t kwe_flags, + thread_continue_t continuation, block_hint_t block_hint) { - int res; - thread_t th = current_thread(); uthread_t uth = pthread_kern->get_bsdthread_info(th); + struct turnstile **tstore = NULL; + int res; + + assert(continuation != THREAD_CONTINUE_NULL); + ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth); bzero(kwe, sizeof(*kwe)); kwe->kwe_count = 1; kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK; kwe->kwe_state = KWE_THREAD_INWAIT; kwe->kwe_uth = uth; - kwe->kwe_tid = thread_tid(th); + kwe->kwe_thread = th; + kwe->kwe_flags = kwe_flags; res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit); if (res != 0) { @@ -1859,43 +1926,39 @@ ksyn_wait(ksyn_wait_queue_t kwq, ksyn_wqunlock(kwq); return res; } - - thread_set_pending_block_hint(th, block_hint); - assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0); + + PTHREAD_TRACE(psynch_mutex_kwqwait, kwq->kw_addr, kwq->kw_inqueue, + kwq->kw_prepost.count, kwq->kw_intr.count); + + if (_kwq_use_turnstile(kwq)) { + // pthread mutexes and rwlocks both (at least sometimes) know their + // owner and can use turnstiles. Otherwise, we pass NULL as the + // tstore to the shims so they wait on the global waitq. + tstore = &kwq->kw_turnstile; + } + + pthread_kern->psynch_wait_prepare((uintptr_t)kwq, tstore, kwq->kw_owner, + block_hint, abstime); + ksyn_wqunlock(kwq); - - kern_return_t ret; - if (continuation == THREAD_CONTINUE_NULL) { - ret = thread_block(NULL); - } else { - ret = thread_block_parameter(continuation, kwq); - - // If thread_block_parameter returns (interrupted) call the - // continuation manually to clean up. - continuation(kwq, ret); - - // NOT REACHED - panic("ksyn_wait continuation returned"); + + if (tstore) { + pthread_kern->psynch_wait_update_complete(kwq->kw_turnstile); } - res = _wait_result_to_errno(ret); - if (res != 0) { - ksyn_wqlock(kwq); - if (kwe->kwe_kwqqueue) { - ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe); - } - ksyn_wqunlock(kwq); - } - return res; + thread_block_parameter(continuation, kwq); + + // NOT REACHED + panic("ksyn_wait continuation returned"); + __builtin_unreachable(); } kern_return_t -ksyn_signal(ksyn_wait_queue_t kwq, - int kqi, - ksyn_waitq_element_t kwe, - uint32_t updateval) +ksyn_signal(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, + ksyn_waitq_element_t kwe, uint32_t updateval) { kern_return_t ret; + struct turnstile **tstore = NULL; // If no wait element was specified, wake the first. if (!kwe) { @@ -1912,7 +1975,12 @@ ksyn_signal(ksyn_wait_queue_t kwq, ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe); kwe->kwe_psynchretval = updateval; - ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval); + if (_kwq_use_turnstile(kwq)) { + tstore = &kwq->kw_turnstile; + } + + ret = pthread_kern->psynch_wait_wakeup(kwq, kwe, tstore); + if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) { panic("ksyn_signal: panic waking up thread %x\n", ret); } @@ -1925,7 +1993,8 @@ ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) kern_return_t ret; vm_page_info_basic_data_t info; mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; - ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count); + ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, + VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count); if (ret != KERN_SUCCESS) { return EINVAL; } @@ -1943,20 +2012,22 @@ ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp) /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */ int -kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[]) +kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, + int *typep, uint32_t lowest[]) { uint32_t kw_fr, kw_fwr, low; int type = 0, lowtype, typenum[2] = { 0 }; uint32_t numbers[2] = { 0 }; int count = 0, i; - - if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) { + if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || + ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) { type |= PTH_RWSHFT_TYPE_READ; /* read entries are present */ if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) { kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum; - if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0)) + if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && + (is_seqlower(premgen, kw_fr) != 0)) kw_fr = premgen; } else kw_fr = premgen; @@ -1968,22 +2039,24 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type } else lowest[KSYN_QUEUE_READ] = 0; - if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) { + if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) || + ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) { type |= PTH_RWSHFT_TYPE_WRITE; /* read entries are present */ - if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) { - kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum; - if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0)) + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) { + kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum; + if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && + (is_seqlower(premgen, kw_fwr) != 0)) kw_fwr = premgen; } else kw_fwr = premgen; - lowest[KSYN_QUEUE_WRITER] = kw_fwr; + lowest[KSYN_QUEUE_WRITE] = kw_fwr; numbers[count]= kw_fwr; typenum[count] = PTH_RW_TYPE_WRITE; count++; } else - lowest[KSYN_QUEUE_WRITER] = 0; + lowest[KSYN_QUEUE_WRITE] = 0; #if __TESTPANICS__ if (count == 0) @@ -2009,7 +2082,8 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type /* wakeup readers to upto the writer limits */ int -ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp) +ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, + uint32_t updatebits, int *wokenp) { ksyn_queue_t kq; int failedwakeup = 0; @@ -2020,7 +2094,8 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, ui lbits = updatebits; kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; - while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) { + while ((kq->ksynq_count != 0) && + (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) { kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits); if (kret == KERN_NOT_WAITING) { failedwakeup++; @@ -2034,19 +2109,17 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, ui } -/* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */ +/* + * This handles the unlock grants for next set on rw_unlock() or on arrival + * of all preposted waiters. + */ int -kwq_handle_unlock(ksyn_wait_queue_t kwq, - __unused uint32_t mgen, - uint32_t rw_wc, - uint32_t *updatep, - int flags, - int *blockp, - uint32_t premgen) +kwq_handle_unlock(ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t rw_wc, + uint32_t *updatep, int flags, int *blockp, uint32_t premgen) { uint32_t low_writer, limitrdnum; int rwtype, error=0; - int allreaders, failed; + int allreaders, nfailed; uint32_t updatebits=0, numneeded = 0;; int prepost = flags & KW_UNLOCK_PREPOST; thread_t preth = THREAD_NULL; @@ -2067,7 +2140,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; kwq->kw_lastseqword = rw_wc; kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK); - kwq->kw_overlapwatch = 0; + kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD; error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest); #if __TESTPANICS__ @@ -2075,7 +2148,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, panic("rwunlock: cannot fails to slot next round of threads"); #endif /* __TESTPANICS__ */ - low_writer = lowest[KSYN_QUEUE_WRITER]; + low_writer = lowest[KSYN_QUEUE_WRITE]; allreaders = 0; updatebits = 0; @@ -2108,7 +2181,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, } else { // no writers at all // no other waiters only readers - kwq->kw_overlapwatch = 1; + kwq->kw_kflags |= KSYN_KWF_OVERLAP_GUARD; numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count; if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) { curthreturns = 1; @@ -2128,18 +2201,19 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, } - failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken); - if (failed != 0) { - kwq->kw_pre_intrcount = failed; /* actually a count */ - kwq->kw_pre_intrseq = limitrdnum; - kwq->kw_pre_intrretbits = updatebits; - kwq->kw_pre_intrtype = PTH_RW_TYPE_READ; + nfailed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, + updatebits, &woken); + if (nfailed != 0) { + _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_READ, nfailed, + limitrdnum, updatebits); } error = 0; - if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0)) + if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) && + ((updatebits & PTH_RWL_WBIT) == 0)) { panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits); + } } break; @@ -2151,7 +2225,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) { block = 0; - if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) { + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) { updatebits |= PTH_RWL_WBIT; } th = preth; @@ -2161,23 +2235,23 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, } else { /* we are not granting writelock to the preposting thread */ /* if there are writers present or the preposting write thread then W bit is to be set */ - if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 || + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count > 1 || (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) { updatebits |= PTH_RWL_WBIT; } /* setup next in the queue */ - kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits); + kret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, NULL, updatebits); if (kret == KERN_NOT_WAITING) { - kwq->kw_pre_intrcount = 1; /* actually a count */ - kwq->kw_pre_intrseq = low_writer; - kwq->kw_pre_intrretbits = updatebits; - kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE; + _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1, + low_writer, updatebits); } error = 0; } kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; - if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT)) + if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != + (PTH_RWL_KBIT | PTH_RWL_EBIT)) { panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits); + } } break; @@ -2204,7 +2278,8 @@ ksyn_queue_init(ksyn_queue_t kq) } int -ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit) +ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, + uint32_t mgen, int fit) { ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi]; uint32_t lockseq = mgen & PTHRW_COUNT_MASK; @@ -2229,11 +2304,13 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint kq->ksynq_lastnum = lockseq; } } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) { - /* During prepost when a thread is getting cancelled, we could have two with same seq */ + /* During prepost when a thread is getting cancelled, we could have + * two with same seq */ res = EBUSY; if (kwe->kwe_state == KWE_THREAD_PREPOST) { ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq); - if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) { + if (tmp != NULL && tmp->kwe_uth != NULL && + pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) { TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); res = 0; } @@ -2267,7 +2344,8 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint } void -ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe) +ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, + ksyn_waitq_element_t kwe) { if (kq->ksynq_count == 0) { panic("removing item from empty queue"); @@ -2308,7 +2386,8 @@ ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_elemen } ksyn_waitq_element_t -ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq) +ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, + uint32_t seq) { ksyn_waitq_element_t kwe; @@ -2334,7 +2413,8 @@ ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen) result = kwe; // KWE_THREAD_INWAIT must be strictly equal - if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) { + if (kwe->kwe_state == KWE_THREAD_INWAIT && + (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) { result = NULL; } break; @@ -2345,7 +2425,8 @@ ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen) /* look for a thread at lockseq, a */ ksyn_waitq_element_t -ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq) +ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, + uint32_t uptoseq, uint32_t signalseq) { ksyn_waitq_element_t result = NULL; ksyn_waitq_element_t q_kwe, r_kwe; @@ -2358,7 +2439,8 @@ ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint3 return result; } } - if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) { + if (q_kwe->kwe_state == KWE_THREAD_PREPOST | + q_kwe->kwe_state == KWE_THREAD_BROADCAST) { /* match any prepost at our same uptoseq or any broadcast above */ if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) { continue; @@ -2399,6 +2481,10 @@ ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all) ksyn_waitq_element_t kwe; uint32_t tseq = upto & PTHRW_COUNT_MASK; ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi]; + uint32_t freed = 0, signaled = 0; + + PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_START, kwq->kw_addr, + kqi, upto, all); while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) { if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) { @@ -2411,17 +2497,28 @@ ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all) * return them as spurious wait so the cvar state gets * reset correctly. */ + + PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe, + kwq->kw_inqueue, 1); /* skip canceled ones */ /* wake the rest */ /* set M bit to indicate to waking CV to retun Inc val */ - (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT); + (void)ksyn_signal(kwq, kqi, kwe, + PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT); + signaled++; } else { + PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe, + kwq->kw_inqueue, 2); ksyn_queue_remove_item(kwq, kq, kwe); - pthread_kern->zfree(kwe_zone, kwe); + zfree(kwe_zone, kwe); kwq->kw_fakecount--; + freed++; } } + + PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_END, kwq->kw_addr, freed, + signaled, kwq->kw_inqueue); } /*************************************************************************/ @@ -2483,7 +2580,8 @@ find_nexthighseq(ksyn_wait_queue_t kwq) } int -find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp) +find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, + uint32_t *countp) { int i; uint32_t count = 0; @@ -2540,10 +2638,13 @@ ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep) { ksyn_waitq_element_t kwe, newkwe; uint32_t updatebits = 0; - ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; + ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE]; struct ksyn_queue kfreeq; ksyn_queue_init(&kfreeq); + + PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_START, ckwq->kw_addr, upto, + ckwq->kw_inqueue, 0); retry: TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) { @@ -2555,11 +2656,14 @@ retry: if (kwe->kwe_state == KWE_THREAD_INWAIT) { // Wake only non-canceled threads waiting on this CV. if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) { - (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT); + PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, 0, 1); + (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT); updatebits += PTHRW_INC; } } else if (kwe->kwe_state == KWE_THREAD_BROADCAST || kwe->kwe_state == KWE_THREAD_PREPOST) { + PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, + kwe->kwe_state, 2); ksyn_queue_remove_item(ckwq, kq, kwe); TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list); ckwq->kw_fakecount--; @@ -2571,27 +2675,34 @@ retry: /* Need to enter a broadcast in the queue (if not already at L == S) */ if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) { + PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, ckwq->kw_lword, + ckwq->kw_sword, 3); + newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist); if (newkwe == NULL) { ksyn_wqunlock(ckwq); - newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone); + newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone); TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list); ksyn_wqlock(ckwq); goto retry; } else { TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list); ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto); + PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, newkwe, 0, 4); } } // free up any remaining things stumbled across above while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) { TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list); - pthread_kern->zfree(kwe_zone, kwe); + zfree(kwe_zone, kwe); } + + PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_END, ckwq->kw_addr, + updatebits, 0, 0); if (updatep != NULL) { - *updatep = updatebits; + *updatep |= updatebits; } } @@ -2601,7 +2712,7 @@ ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits) if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { if (ckwq->kw_inqueue != 0) { /* FREE THE QUEUE */ - ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0); + ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 0); #if __TESTPANICS__ if (ckwq->kw_inqueue != 0) panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S"); @@ -2619,8 +2730,10 @@ ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits) void psynch_zoneinit(void) { - kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue"); - kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element"); + kwq_zone = zinit(sizeof(struct ksyn_wait_queue), + 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue"); + kwe_zone = zinit(sizeof(struct ksyn_waitq_element), + 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element"); } void * @@ -2641,13 +2754,14 @@ _pthread_get_thread_kwq(thread_t thread) * to pthread sync objects. */ void -_pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo * waitinfo) +_pthread_find_owner(thread_t thread, + struct stackshot_thread_waitinfo * waitinfo) { ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread); switch (waitinfo->wait_type) { case kThreadWaitPThreadMutex: assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX); - waitinfo->owner = kwq->kw_owner; + waitinfo->owner = thread_tid(kwq->kw_owner); waitinfo->context = kwq->kw_addr; break; /* Owner of rwlock not stored in kernel space due to races. Punt diff --git a/kern/kern_trace.h b/kern/kern_trace.h index e65e7b9..2e59edc 100644 --- a/kern/kern_trace.h +++ b/kern/kern_trace.h @@ -39,7 +39,9 @@ // pthread tracing subclasses # define _TRACE_SUB_DEFAULT 0 # define _TRACE_SUB_WORKQUEUE 1 -# define _TRACE_SUB_MUTEX 2 +// WQ_TRACE_REQUESTS_SUBCLASS is 2, in xnu +# define _TRACE_SUB_MUTEX 3 +# define _TRACE_SUB_CONDVAR 4 #ifndef _PTHREAD_BUILDING_CODES_ @@ -62,14 +64,14 @@ VM_UNSLIDE(void* ptr) return (void*)unslid_ptr; } -# define PTHREAD_TRACE(x,a,b,c,d,e) \ - { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, a, b, c, d, e); } } +# define PTHREAD_TRACE(x,a,b,c,d) \ + { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, a, b, c, d, 0); } } -# define PTHREAD_TRACE_WQ(x,a,b,c,d,e) \ - { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, VM_UNSLIDE(a), b, c, d, e); } } +# define PTHREAD_TRACE_WQ(x,a,b,c,d) \ + { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), b, c, d, 0); } } # define PTHREAD_TRACE_WQ_REQ(x,a,b,c,d,e) \ - { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } } + { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } } #else // KERNEL @@ -138,5 +140,25 @@ TRACE_CODE(psynch_mutex_uunlock, _TRACE_SUB_MUTEX, 0x2); TRACE_CODE(psynch_ksyn_incorrect_owner, _TRACE_SUB_MUTEX, 0x3); TRACE_CODE(psynch_mutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x4); TRACE_CODE(psynch_mutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x5); +TRACE_CODE(psynch_mutex_clearprepost, _TRACE_SUB_MUTEX, 0x6); +TRACE_CODE(psynch_mutex_kwqallocate, _TRACE_SUB_MUTEX, 0x7); +TRACE_CODE(psynch_mutex_kwqdeallocate, _TRACE_SUB_MUTEX, 0x8); +TRACE_CODE(psynch_mutex_kwqprepost, _TRACE_SUB_MUTEX, 0x9); +TRACE_CODE(psynch_mutex_markprepost, _TRACE_SUB_MUTEX, 0x10); +TRACE_CODE(psynch_mutex_kwqcollision, _TRACE_SUB_MUTEX, 0x11); +TRACE_CODE(psynch_ffmutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x12); +TRACE_CODE(psynch_ffmutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x13); +TRACE_CODE(psynch_ffmutex_wake, _TRACE_SUB_MUTEX, 0x14); +TRACE_CODE(psynch_mutex_kwqsignal, _TRACE_SUB_MUTEX, 0x15); +TRACE_CODE(psynch_ffmutex_wait, _TRACE_SUB_MUTEX, 0x16); +TRACE_CODE(psynch_mutex_kwqwait, _TRACE_SUB_MUTEX, 0x17); + +TRACE_CODE(psynch_cvar_kwait, _TRACE_SUB_CONDVAR, 0x0); +TRACE_CODE(psynch_cvar_clrprepost, _TRACE_SUB_CONDVAR, 0x1); +TRACE_CODE(psynch_cvar_freeitems, _TRACE_SUB_CONDVAR, 0x2); +TRACE_CODE(psynch_cvar_signal, _TRACE_SUB_CONDVAR, 0x3); +TRACE_CODE(psynch_cvar_broadcast, _TRACE_SUB_CONDVAR, 0x5); +TRACE_CODE(psynch_cvar_zeroed, _TRACE_SUB_CONDVAR, 0x6); +TRACE_CODE(psynch_cvar_updateval, _TRACE_SUB_CONDVAR, 0x7); #endif // _KERN_TRACE_H_ diff --git a/kern/synch_internal.h b/kern/synch_internal.h index 6b22c41..1b9d6c2 100644 --- a/kern/synch_internal.h +++ b/kern/synch_internal.h @@ -24,6 +24,12 @@ #ifndef __SYNCH_INTERNAL_H__ #define __SYNCH_INTERNAL_H__ +// kwe_state +enum { + KWE_THREAD_INWAIT = 1, + KWE_THREAD_PREPOST, + KWE_THREAD_BROADCAST, +}; #define _PTHREAD_MTX_OPT_PSHARED 0x010 #define _PTHREAD_MTX_OPT_NOTIFY 0x1000 /* notify to drop mutex handling in cvwait */ @@ -79,8 +85,13 @@ #define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0) // S word tests -#define is_rws_setseq(x) (((x) & PTH_RWS_SBIT)) -#define is_rws_setunlockinit(x) (((x) & PTH_RWS_IBIT)) +#define is_rws_sbit_set(x) (((x) & PTH_RWS_SBIT) != 0) +#define is_rws_unlockinit_set(x) (((x) & PTH_RWS_IBIT) != 0) +#define is_rws_savemask_set(x) (((x) & PTHRW_RWS_SAVEMASK) != 0) +#define is_rws_pbit_set(x) (((x) & PTH_RWS_CV_PBIT) != 0) + +// kwe_flags +#define KWE_FLAG_LOCKPREPOST 0x1 // cvwait caused a lock prepost static inline int is_seqlower(uint32_t x, uint32_t y) diff --git a/kern/workqueue_internal.h b/kern/workqueue_internal.h index 28d870e..c044fe7 100644 --- a/kern/workqueue_internal.h +++ b/kern/workqueue_internal.h @@ -33,175 +33,18 @@ * duplicate definitions that used to exist in both projects, when separate. */ -/* workq_kernreturn commands */ -#define WQOPS_THREAD_RETURN 0x04 /* parks the thread back into the kernel */ -#define WQOPS_QUEUE_NEWSPISUPP 0x10 /* this is to check for newer SPI support */ -#define WQOPS_QUEUE_REQTHREADS 0x20 /* request number of threads of a prio */ -#define WQOPS_QUEUE_REQTHREADS2 0x30 /* request a number of threads in a given priority bucket */ -#define WQOPS_THREAD_KEVENT_RETURN 0x40 /* parks the thread after delivering the passed kevent array */ -#define WQOPS_SET_EVENT_MANAGER_PRIORITY 0x80 /* max() in the provided priority in the the priority of the event manager */ -#define WQOPS_THREAD_WORKLOOP_RETURN 0x100 /* parks the thread after delivering the passed kevent array */ -#define WQOPS_SHOULD_NARROW 0x200 /* checks whether we should narrow our concurrency */ - -/* flag values for upcall flags field, only 8 bits per struct threadlist */ -#define WQ_FLAG_THREAD_PRIOMASK 0x0000ffff -#define WQ_FLAG_THREAD_PRIOSHIFT 16 -#define WQ_FLAG_THREAD_OVERCOMMIT 0x00010000 /* thread is with overcommit prio */ -#define WQ_FLAG_THREAD_REUSE 0x00020000 /* thread is being reused */ -#define WQ_FLAG_THREAD_NEWSPI 0x00040000 /* the call is with new SPIs */ -#define WQ_FLAG_THREAD_KEVENT 0x00080000 /* thread is response to kevent req */ -#define WQ_FLAG_THREAD_EVENT_MANAGER 0x00100000 /* event manager thread */ -#define WQ_FLAG_THREAD_TSD_BASE_SET 0x00200000 /* tsd base has already been set */ -#define WQ_FLAG_THREAD_WORKLOOP 0x00400000 /* workloop thread */ - -#define WQ_THREAD_CLEANUP_QOS QOS_CLASS_DEFAULT - -#define WQ_KEVENT_LIST_LEN 16 // WORKQ_KEVENT_EVENT_BUFFER_LEN -#define WQ_KEVENT_DATA_SIZE (32 * 1024) - -/* These definitions are only available to the kext, to avoid bleeding constants and types across the boundary to - * the userspace library. - */ -#ifdef KERNEL - -/* These defines come from kern/thread.h but are XNU_KERNEL_PRIVATE so do not get - * exported to kernel extensions. - */ -#define SCHED_CALL_BLOCK 0x1 -#define SCHED_CALL_UNBLOCK 0x2 - -// kwe_state -enum { - KWE_THREAD_INWAIT = 1, - KWE_THREAD_PREPOST, - KWE_THREAD_BROADCAST, -}; - -/* old workq priority scheme */ - -#define WORKQUEUE_HIGH_PRIOQUEUE 0 /* high priority queue */ -#define WORKQUEUE_DEFAULT_PRIOQUEUE 1 /* default priority queue */ -#define WORKQUEUE_LOW_PRIOQUEUE 2 /* low priority queue */ -#define WORKQUEUE_BG_PRIOQUEUE 3 /* background priority queue */ - -#define WORKQUEUE_NUM_BUCKETS 7 - // Sometimes something gets passed a bucket number and we need a way to express -// that it's actually the event manager. Use the (n+1)th bucket for that. -#define WORKQUEUE_EVENT_MANAGER_BUCKET (WORKQUEUE_NUM_BUCKETS-1) - -/* wq_max_constrained_threads = max(64, N_CPU * WORKQUEUE_CONSTRAINED_FACTOR) - * This used to be WORKQUEUE_NUM_BUCKETS + 1 when NUM_BUCKETS was 4, yielding - * N_CPU * 5. When NUM_BUCKETS changed, we decided that the limit should - * not change. So the factor is now always 5. - */ -#define WORKQUEUE_CONSTRAINED_FACTOR 5 - -#define WORKQUEUE_OVERCOMMIT 0x10000 - -/* - * A thread which is scheduled may read its own th_priority field without - * taking the workqueue lock. Other fields should be assumed to require the - * lock. - */ -struct threadlist { - TAILQ_ENTRY(threadlist) th_entry; - thread_t th_thread; - struct workqueue *th_workq; - mach_vm_offset_t th_stackaddr; - mach_port_name_t th_thport; - uint16_t th_flags; - uint8_t th_upcall_flags; - uint8_t th_priority; -}; - -#define TH_LIST_INITED 0x0001 /* Set at thread creation. */ -#define TH_LIST_RUNNING 0x0002 /* On thrunlist, not parked. */ -#define TH_LIST_KEVENT 0x0004 /* Thread requested by kevent */ -#define TH_LIST_NEW 0x0008 /* First return to userspace */ -#define TH_LIST_BUSY 0x0010 /* Removed from idle list but not ready yet. */ -#define TH_LIST_KEVENT_BOUND 0x0020 /* Thread bound to kqueues */ -#define TH_LIST_CONSTRAINED 0x0040 /* Non-overcommit thread. */ -#define TH_LIST_EVENT_MGR_SCHED_PRI 0x0080 /* Non-QoS Event Manager */ -#define TH_LIST_UNBINDING 0x0100 /* Thread is unbinding during park */ -#define TH_LIST_REMOVING_VOUCHER 0x0200 /* Thread is removing its voucher */ -#define TH_LIST_PACING 0x0400 /* Thread is participating in pacing */ - -struct threadreq { - TAILQ_ENTRY(threadreq) tr_entry; - uint16_t tr_flags; - uint8_t tr_state; - uint8_t tr_priority; -}; -TAILQ_HEAD(threadreq_head, threadreq); - -#define TR_STATE_NEW 0 /* Not yet enqueued */ -#define TR_STATE_WAITING 1 /* Waiting to be serviced - on reqlist */ -#define TR_STATE_COMPLETE 2 /* Request handled - for caller to free */ -#define TR_STATE_DEAD 3 - -#define TR_FLAG_KEVENT 0x01 -#define TR_FLAG_OVERCOMMIT 0x02 -#define TR_FLAG_ONSTACK 0x04 -#define TR_FLAG_WORKLOOP 0x08 -#define TR_FLAG_NO_PACING 0x10 - -#if defined(__LP64__) -typedef unsigned __int128 wq_thactive_t; -#else -typedef uint64_t wq_thactive_t; -#endif - -struct workqueue { - proc_t wq_proc; - vm_map_t wq_map; - task_t wq_task; - - lck_spin_t wq_lock; - - thread_call_t wq_atimer_delayed_call; - thread_call_t wq_atimer_immediate_call; - - uint32_t _Atomic wq_flags; - uint32_t wq_timer_interval; - uint32_t wq_threads_scheduled; - uint32_t wq_constrained_threads_scheduled; - uint32_t wq_nthreads; - uint32_t wq_thidlecount; - uint32_t wq_event_manager_priority; - uint8_t wq_lflags; // protected by wqueue lock - uint8_t wq_paced; // protected by wqueue lock - uint16_t __wq_unused; - - TAILQ_HEAD(, threadlist) wq_thrunlist; - TAILQ_HEAD(, threadlist) wq_thidlelist; - TAILQ_HEAD(, threadlist) wq_thidlemgrlist; - - uint32_t wq_reqcount; /* number of elements on the following lists */ - struct threadreq_head wq_overcommit_reqlist[WORKQUEUE_EVENT_MANAGER_BUCKET]; - struct threadreq_head wq_reqlist[WORKQUEUE_EVENT_MANAGER_BUCKET]; - struct threadreq wq_event_manager_threadreq; - - struct threadreq *wq_cached_threadreq; - - uint16_t wq_thscheduled_count[WORKQUEUE_NUM_BUCKETS]; - _Atomic wq_thactive_t wq_thactive; - _Atomic uint64_t wq_lastblocked_ts[WORKQUEUE_NUM_BUCKETS]; -}; -#define WQ_EXITING 0x01 -#define WQ_ATIMER_DELAYED_RUNNING 0x02 -#define WQ_ATIMER_IMMEDIATE_RUNNING 0x04 - -#define WQL_ATIMER_BUSY 0x01 -#define WQL_ATIMER_WAITING 0x02 - -#define WORKQUEUE_MAXTHREADS 512 -#define WQ_STALLED_WINDOW_USECS 200 -#define WQ_REDUCE_POOL_WINDOW_USECS 5000000 -#define WQ_MAX_TIMER_INTERVAL_USECS 50000 - -#define WQ_THREADLIST_EXITING_POISON (void *)~0ul - -#endif // KERNEL +// that it's actually the event manager. Use the (0)th bucket for that. +#define WORKQ_THREAD_QOS_MIN (THREAD_QOS_MAINTENANCE) +#define WORKQ_THREAD_QOS_MAX (THREAD_QOS_LAST - 1) +#define WORKQ_THREAD_QOS_CLEANUP (THREAD_QOS_LEGACY) +#define WORKQ_THREAD_QOS_MANAGER (THREAD_QOS_LAST) // outside of MIN/MAX + +#define WORKQ_NUM_QOS_BUCKETS (WORKQ_THREAD_QOS_MAX) +#define WORKQ_NUM_BUCKETS (WORKQ_THREAD_QOS_MAX + 1) +#define WORKQ_IDX(qos) ((qos) - 1) // 0 based index + +// magical `nkevents` values for _pthread_wqthread +#define WORKQ_EXIT_THREAD_NKEVENT (-1) #endif // _WORKQUEUE_INTERNAL_H_ diff --git a/libpthread.xcodeproj/project.pbxproj b/libpthread.xcodeproj/project.pbxproj index 33df537..1c4fd1a 100644 --- a/libpthread.xcodeproj/project.pbxproj +++ b/libpthread.xcodeproj/project.pbxproj @@ -62,6 +62,20 @@ /* End PBXAggregateTarget section */ /* Begin PBXBuildFile section */ + 6E2A3BBE2101222F0003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 6E2A3BBF210122300003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 6E2A3BC0210122340003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; }; + 6E5869C720C9040A00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; + 6E5869C820C9040B00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; + 6E5869C920C9040C00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; }; + 6E5869CB20C9043200F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; }; + 6E5869CC20C9043B00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; }; + 6E5869CD20C9043B00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; }; + 6E5869CE20C9043C00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; }; + 6E5869CF20C9043C00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; }; + 6E5869D020C9043D00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; }; + 6E5869D120C9043D00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; }; + 6E5869D220C9043E00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; }; 6E8C16541B14F08A00C8987C /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; }; 6E8C16551B14F08A00C8987C /* pthread.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325FA15B7513200270056 /* pthread.c */; }; 6E8C16561B14F08A00C8987C /* pthread_cancelable.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F115B7513200270056 /* pthread_cancelable.c */; }; @@ -166,7 +180,6 @@ C9A1BF5015C9A59B006BB313 /* sched.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A3260115B7513700270056 /* sched.h */; settings = {ATTRIBUTES = (Public, ); }; }; C9A1BF5315C9A9F5006BB313 /* pthread_cancelable_cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5215C9A9F5006BB313 /* pthread_cancelable_cancel.c */; }; C9A1BF5515C9CB9D006BB313 /* pthread_cancelable_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5415C9CB9D006BB313 /* pthread_cancelable_legacy.c */; }; - C9A960B0183EB42700AE10C8 /* kern_policy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A960AF183EB42700AE10C8 /* kern_policy.c */; }; C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F915B7513200270056 /* workqueue_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; C9BB478D15E6ADF700F135B7 /* tsd_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F415B7513200270056 /* tsd_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; C9CCFB9D18B6D0910060CAAE /* qos_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C99B17DA189C2E1B00991D38 /* qos_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; @@ -334,11 +347,15 @@ containerPortal = C9A325D915B7347000270056 /* Project object */; proxyType = 1; remoteGlobalIDString = E4F4498C1E82C1F000A7FB9A; - remoteInfo = "libpthread alt resolved"; + remoteInfo = "libpthread armv81 resolved"; }; /* End PBXContainerItemProxy section */ /* Begin PBXFileReference section */ + 6E2A3BBD210122230003B53B /* stack_np.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack_np.h; sourceTree = ""; }; + 6E514A0220B67C0900844EE1 /* offsets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = offsets.h; sourceTree = ""; }; + 6E5869C620C8FE8300F1CB75 /* dependency_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dependency_private.h; sourceTree = ""; }; + 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = pthread_dependency.c; sourceTree = ""; }; 6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; }; 6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = pthread_introspection.xcconfig; sourceTree = ""; }; 6EB232C91B0EB29D005915CE /* resolver.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = resolver.c; sourceTree = ""; }; @@ -444,7 +461,6 @@ C9A3260015B7513700270056 /* pthread_spis.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pthread_spis.h; sourceTree = ""; }; C9A3260115B7513700270056 /* sched.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sched.h; sourceTree = ""; }; C9A3260C15B759B600270056 /* pthread.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = pthread.xcconfig; sourceTree = ""; }; - C9A960AF183EB42700AE10C8 /* kern_policy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kern_policy.c; sourceTree = ""; }; C9A960B318452B2F00AE10C8 /* pthread.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; path = pthread.py; sourceTree = ""; }; C9A960B618452CDD00AE10C8 /* install-lldbmacros.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-lldbmacros.sh"; sourceTree = ""; }; C9C2212D15FA978D00447568 /* pthread.aliases */ = {isa = PBXFileReference; lastKnownFileType = text; path = pthread.aliases; sourceTree = ""; }; @@ -460,7 +476,7 @@ E4943AA71E80BD8400D2A961 /* resolver_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resolver_internal.h; sourceTree = ""; }; E4D962F919086AD600E8A9F2 /* qos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qos.h; sourceTree = ""; }; E4D962FC19086C5700E8A9F2 /* install-sys-headers.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "install-sys-headers.sh"; sourceTree = ""; }; - E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_alt.a; sourceTree = BUILT_PRODUCTS_DIR; }; + E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_armv81.a; sourceTree = BUILT_PRODUCTS_DIR; }; E4F449A31E82CF0100A7FB9A /* resolver.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = resolver.xcconfig; sourceTree = ""; }; E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; }; FC30E28D16A747AD00A25B5F /* synch_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = synch_internal.h; sourceTree = ""; }; @@ -547,7 +563,6 @@ C9D9E8FE1626248800448CED /* pthread-Info.plist */, C9C533841607C928009988FA /* kern_internal.h */, C9169DDF1603DF9B005A2F8C /* kern_init.c */, - C9A960AF183EB42700AE10C8 /* kern_policy.c */, C9169DDB1603DE84005A2F8C /* kern_synch.c */, C9169DDC1603DE84005A2F8C /* kern_support.c */, C979E9FB18A1BC2A000951E5 /* kern_trace.h */, @@ -656,7 +671,7 @@ 6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */, C04545B81C584F4A006A53B3 /* libpthread.a */, E41505E71E818BEB00F243FB /* libpthread_mp.a */, - E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */, + E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */, E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */, ); name = Products; @@ -665,6 +680,7 @@ C9A325ED15B74FB600270056 /* src */ = { isa = PBXGroup; children = ( + 6E514A0220B67C0900844EE1 /* offsets.h */, C9A325F315B7513200270056 /* internal.h */, C9A325EF15B7513200270056 /* plockstat.d */, C9A325FA15B7513200270056 /* pthread.c */, @@ -674,6 +690,7 @@ C9A325F215B7513200270056 /* pthread_cond.c */, 924D8EDE1C11832A002AC2BC /* pthread_cwd.c */, C9A325F515B7513200270056 /* pthread_mutex.c */, + 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */, C9A325F615B7513200270056 /* pthread_rwlock.c */, C975D5DC15C9D16B0098ECD8 /* pthread_support.c */, C9A325F815B7513200270056 /* pthread_tsd.c */, @@ -694,6 +711,7 @@ C9A3260015B7513700270056 /* pthread_spis.h */, C9A3260115B7513700270056 /* sched.h */, C98C95D818FF1F4E005654FB /* spawn.h */, + 6E2A3BBD210122230003B53B /* stack_np.h */, C9244C1A185FCFED00075748 /* qos.h */, ); path = pthread; @@ -756,6 +774,7 @@ E4657D4017284F7B007D1847 /* introspection_private.h */, C99B17DA189C2E1B00991D38 /* qos_private.h */, E4063CF21906B4FB000202F9 /* qos.h */, + 6E5869C620C8FE8300F1CB75 /* dependency_private.h */, ); path = private; sourceTree = ""; @@ -805,6 +824,7 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( + 6E2A3BC0210122340003B53B /* stack_np.h in Headers */, 6E8C16711B14F08A00C8987C /* posix_sched.h in Headers */, 6E8C166F1B14F08A00C8987C /* introspection_private.h in Headers */, E41A64AE1E83C470009479A9 /* introspection.h in Headers */, @@ -819,6 +839,7 @@ 6E8C166B1B14F08A00C8987C /* pthread_impl.h in Headers */, 6E8C166D1B14F08A00C8987C /* pthread_spis.h in Headers */, 6E8C166E1B14F08A00C8987C /* sched.h in Headers */, + 6E5869C920C9040C00F1CB75 /* dependency_private.h in Headers */, 6E8C16751B14F08A00C8987C /* spawn.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; @@ -848,6 +869,7 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( + 6E2A3BBE2101222F0003B53B /* stack_np.h in Headers */, C9244C1B185FD33000075748 /* qos.h in Headers */, C9A1BF4D15C9A58E006BB313 /* pthread.h in Headers */, C9A1BF4E15C9A594006BB313 /* pthread_impl.h in Headers */, @@ -862,6 +884,7 @@ C98C95D918FF1F4E005654FB /* spawn.h in Headers */, C99AD87C15DEC5290009A6F8 /* spinlock_private.h in Headers */, C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */, + 6E5869C720C9040A00F1CB75 /* dependency_private.h in Headers */, C9153096167ACC2B006BB094 /* private.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; @@ -877,6 +900,7 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( + 6E2A3BBF210122300003B53B /* stack_np.h in Headers */, E4F449BE1E82D03500A7FB9A /* qos.h in Headers */, E4F449BF1E82D03500A7FB9A /* pthread.h in Headers */, E4F449C01E82D03500A7FB9A /* pthread_impl.h in Headers */, @@ -891,6 +915,7 @@ E4F449C91E82D03500A7FB9A /* spawn.h in Headers */, E4F449CA1E82D03500A7FB9A /* spinlock_private.h in Headers */, E4F449CB1E82D03500A7FB9A /* workqueue_private.h in Headers */, + 6E5869C820C9040B00F1CB75 /* dependency_private.h in Headers */, E4F449CC1E82D03500A7FB9A /* private.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; @@ -1044,9 +1069,9 @@ productReference = E41505E71E818BEB00F243FB /* libpthread_mp.a */; productType = "com.apple.product-type.library.static"; }; - E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */ = { + E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */ = { isa = PBXNativeTarget; - buildConfigurationList = E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread alt resolved" */; + buildConfigurationList = E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread armv81 resolved" */; buildPhases = ( E4F4498D1E82C1F000A7FB9A /* Sources */, E4F4499C1E82C1F000A7FB9A /* Symlink normal variant */, @@ -1055,9 +1080,9 @@ ); dependencies = ( ); - name = "libpthread alt resolved"; - productName = libpthread_alt.a; - productReference = E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */; + name = "libpthread armv81 resolved"; + productName = libpthread_armv81.a; + productReference = E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */; productType = "com.apple.product-type.library.static"; }; E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */ = { @@ -1118,7 +1143,7 @@ E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */, 6E8C16511B14F08A00C8987C /* libsystem_pthread introspection */, E41505D01E818BEB00F243FB /* libpthread mp resolved */, - E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */, + E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */, C04545A21C584F4A006A53B3 /* libpthread generic */, C90E7A9E15DC3C3800A06D48 /* libpthread dyld */, 74E594911613AAF4006C417B /* libpthread eOS */, @@ -1389,6 +1414,7 @@ 6E8C16631B14F08A00C8987C /* pthread_support.c in Sources */, 6E8C16641B14F08A00C8987C /* thread_setup.c in Sources */, 6E8C16651B14F08A00C8987C /* pthread_atfork.c in Sources */, + 6E5869CD20C9043B00F1CB75 /* pthread_dependency.c in Sources */, 6E8C16661B14F08A00C8987C /* pthread_asm.s in Sources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -1400,6 +1426,7 @@ 6EB232D01B0EB325005915CE /* resolver.c in Sources */, 74E594931613AAF4006C417B /* pthread.c in Sources */, 74E594941613AAF4006C417B /* pthread_cancelable.c in Sources */, + 6E5869D220C9043E00F1CB75 /* pthread_dependency.c in Sources */, 74E594A61613AB10006C417B /* pthread_cancelable_cancel.c in Sources */, 74E594951613AAF4006C417B /* pthread_cond.c in Sources */, 74E594961613AAF4006C417B /* pthread_mutex.c in Sources */, @@ -1421,6 +1448,7 @@ C04545A41C584F4A006A53B3 /* resolver.c in Sources */, C04545A51C584F4A006A53B3 /* pthread.c in Sources */, C04545A61C584F4A006A53B3 /* pthread_cancelable.c in Sources */, + 6E5869D020C9043D00F1CB75 /* pthread_dependency.c in Sources */, C04545A71C584F4A006A53B3 /* pthread_cancelable_cancel.c in Sources */, C04545A81C584F4A006A53B3 /* pthread_cond.c in Sources */, C04545A91C584F4A006A53B3 /* pthread_mutex.c in Sources */, @@ -1445,6 +1473,7 @@ C90E7AA515DC3C9D00A06D48 /* pthread_cancelable.c in Sources */, C90E7AA615DC3C9D00A06D48 /* pthread_cond.c in Sources */, C90E7AA715DC3C9D00A06D48 /* pthread_mutex.c in Sources */, + 6E5869D120C9043D00F1CB75 /* pthread_dependency.c in Sources */, C90E7AA815DC3C9D00A06D48 /* pthread_rwlock.c in Sources */, C90E7AA915DC3C9D00A06D48 /* pthread_support.c in Sources */, C90E7AAA15DC3C9D00A06D48 /* pthread_tsd.c in Sources */, @@ -1476,6 +1505,7 @@ C975D5DD15C9D16B0098ECD8 /* pthread_support.c in Sources */, C948FCF715D1D1E100180BF5 /* thread_setup.c in Sources */, C90E7AB815DC40D900A06D48 /* pthread_atfork.c in Sources */, + 6E5869CB20C9043200F1CB75 /* pthread_dependency.c in Sources */, C99AD88015E2D8B50009A6F8 /* pthread_asm.s in Sources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -1485,7 +1515,6 @@ buildActionMask = 2147483647; files = ( C9169DDE1603DE84005A2F8C /* kern_support.c in Sources */, - C9A960B0183EB42700AE10C8 /* kern_policy.c in Sources */, C9169DE01603DF9B005A2F8C /* kern_init.c in Sources */, C9D75E4216127B3900C2FB26 /* kern_synch.c in Sources */, ); @@ -1498,6 +1527,7 @@ E41505D21E818BEB00F243FB /* resolver.c in Sources */, E41505D31E818BEB00F243FB /* pthread.c in Sources */, E41505D41E818BEB00F243FB /* pthread_cancelable.c in Sources */, + 6E5869CE20C9043C00F1CB75 /* pthread_dependency.c in Sources */, E41505D51E818BEB00F243FB /* pthread_cancelable_cancel.c in Sources */, E41505D61E818BEB00F243FB /* pthread_cond.c in Sources */, E41505D71E818BEB00F243FB /* pthread_mutex.c in Sources */, @@ -1519,6 +1549,7 @@ E4F4498E1E82C1F000A7FB9A /* resolver.c in Sources */, E4F4498F1E82C1F000A7FB9A /* pthread.c in Sources */, E4F449901E82C1F000A7FB9A /* pthread_cancelable.c in Sources */, + 6E5869CF20C9043C00F1CB75 /* pthread_dependency.c in Sources */, E4F449911E82C1F000A7FB9A /* pthread_cancelable_cancel.c in Sources */, E4F449921E82C1F000A7FB9A /* pthread_cond.c in Sources */, E4F449931E82C1F000A7FB9A /* pthread_mutex.c in Sources */, @@ -1554,6 +1585,7 @@ E4F449B81E82D03500A7FB9A /* pthread_support.c in Sources */, E4F449B91E82D03500A7FB9A /* thread_setup.c in Sources */, E4F449BA1E82D03500A7FB9A /* pthread_atfork.c in Sources */, + 6E5869CC20C9043B00F1CB75 /* pthread_dependency.c in Sources */, E4F449BB1E82D03500A7FB9A /* pthread_asm.s in Sources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -1628,7 +1660,7 @@ }; E4F449A21E82C5A400A7FB9A /* PBXTargetDependency */ = { isa = PBXTargetDependency; - target = E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */; + target = E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */; targetProxy = E4F449A11E82C5A400A7FB9A /* PBXContainerItemProxy */; }; /* End PBXTargetDependency section */ @@ -1832,7 +1864,7 @@ isa = XCBuildConfiguration; baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */; buildSettings = { - RESOLVED_VARIANT = alt; + RESOLVED_VARIANT = armv81; }; name = Release; }; @@ -1840,7 +1872,7 @@ isa = XCBuildConfiguration; baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */; buildSettings = { - RESOLVED_VARIANT = alt; + RESOLVED_VARIANT = armv81; }; name = Debug; }; @@ -1987,7 +2019,7 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread alt resolved" */ = { + E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread armv81 resolved" */ = { isa = XCConfigurationList; buildConfigurations = ( E4F4499E1E82C1F000A7FB9A /* Release */, diff --git a/lldbmacros/init.py b/lldbmacros/init.py new file mode 100644 index 0000000..af7fe69 --- /dev/null +++ b/lldbmacros/init.py @@ -0,0 +1,310 @@ +from xnu import * +import struct + +def GetSeqCount(seq): + return (seq >> 8) + +def GetLSeqBits(seq): + rv = "" + if seq & 0x1: + rv += "K" + if seq & 0x2: + rv += "E" + if seq & 0x4: + rv += "W" + if seq & 0x20: + rv += "M" + if seq & 0x40: + rv += "U" + if seq & 0x80: + rv += "I" + return rv + +def GetSSeqBits(seq): + rv = "" + if seq & 0x1: + rv += "S" + if seq & 0x2: + rv += "I" + if seq & 0x4: + rv += "Ws" + return rv + +def GetLSeqSummary(seq): + return "{:d} {:s}".format(GetSeqCount(seq), GetLSeqBits(seq)) + +def GetSSeqSummary(seq): + return "{:d} {:s}".format(GetSeqCount(seq), GetSSeqBits(seq)) + +@header("{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}".format('sig', 'tid', 'options', 'lseq', 'useq')) +def GetUserMutexSummary(task, uaddr): + if int(task.t_flags) & 0x1: + mtxlayout = "QIIhhIQIII" + padoffset = 1 + else: + mtxlayout = "QIIhhQIII" + padoffset = 0 + + data = GetUserDataAsString(task, unsigned(uaddr), struct.calcsize(mtxlayout)) + info = struct.unpack(mtxlayout, data) + + format = "{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}" + sigstr = str("{0: <#020x}".format(info[0])) + + # the options field dictates whether we were created misaligned + if info[2] & 0x800: + lseq = info[7+padoffset] + useq = info[8+padoffset] + else: + lseq = info[6+padoffset] + useq = info[7+padoffset] + + return format.format(sigstr, hex(info[5+padoffset]), hex(info[2]), hex(lseq), hex(useq)) + +@lldb_command('showusermutex') +def PthreadShowUserMutex(cmd_args=None): + """ + display information about a userspace mutex at a given address + Syntax: (lldb) showusermutex + """ + if not cmd_args: + raise ArgumentError("No arguments passed") + task = kern.GetValueFromAddress(cmd_args[0], "task_t") + uaddr = kern.GetValueFromAddress(cmd_args[1], "user_addr_t") + + print GetUserMutexSummary.header + print GetUserMutexSummary(task, uaddr) + +@lldb_type_summary(['ksyn_wait_queue *', 'ksyn_wait_queue_t']) +@header("{:<20s} {:<20s} {:<10s} {:<6s} {:<6s} {:<8s} {:<8s} {:<8s} {:<8s}".format('kwq', 'uaddr', 'type', 'pflags', 'kflags', 'refs', 'indrop', 'waiters', 'preposts')) +def GetKwqSummary(kwq): + format = "{:<#20x} {:<#20x} {:<10s} {:<6s} {:<6s} {:<8d} {:<8d} {:<8d} {:<8d}\n" + kwq = Cast(kwq, "ksyn_wait_queue_t") + + kwqtype = "" + if kwq.kw_type & 0xff == 0x01: + kwqtype = "mtx" + if kwq.kw_type & 0xff == 0x02: + kwqtype = "cvar" + if kwq.kw_type & 0xff == 0x04: + kwqtype = "rwl" + if kwq.kw_type & 0xff == 0x05: + kwqtype = "sema" + + if kwq.kw_type & 0x1000 == 0x1000: + kwqtype += "W" # INWAIT + if kwq.kw_type & 0x2000 == 0x2000: + kwqtype += "D" # INDROP + + pflags = "" + if kwq.kw_pflags & 0x2: + pflags += "H" # INHASH + if kwq.kw_pflags & 0x4: + pflags += "S" # SHARED + if kwq.kw_pflags & 0x8: + pflags += "W" # WAITING + if kwq.kw_pflags & 0x10: + pflags += "F" # FREELIST + + kflags = "" + if kwq.kw_kflags & 0x1: + kflags += "C" # INITCLEARED + if kwq.kw_kflags & 0x2: + kflags += "Z" # ZEROED + if kwq.kw_kflags & 0x4: + kflags += "Q" # QOS APPLIED + if kwq.kw_kflags & 0x8: + kflags += "O" # OVERLAP + + rs = format.format(kwq, kwq.kw_addr, kwqtype, pflags, kflags, kwq.kw_iocount, kwq.kw_dropcount, kwq.kw_inqueue, kwq.kw_fakecount) + + rs += "\t{:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s}\n".format('lowest', 'highest', 'lword', 'uword', 'sword', 'last', 'next') + rs += "\t{:<10d} {:<10d} {:<10s} {:<10d} {:<10s} {:<10s} {:<10s}\n".format( + GetSeqCount(kwq.kw_lowseq), GetSeqCount(kwq.kw_highseq), + GetLSeqSummary(kwq.kw_lword), GetSeqCount(kwq.kw_uword), + GetSSeqSummary(kwq.kw_sword), GetSSeqSummary(kwq.kw_lastseqword), + GetSSeqSummary(kwq.kw_nextseqword)) + + rs += "\t{:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s}\n".format( + 'pposts', 'lseq', 'sseq', 'intr', 'count', 'seq', 'bits') + + intr_type = "NONE" + if kwq.kw_intr.type == 0x1: + intr_type = "READ" + elif kwq.kw_intr.type == 0x2: + intr_type = "WRITE" + + rs += "\t{:<10d} {:<10s} {:<10s} {:<10s} {:<10d} {:<10s} {:<10s}\n".format( + kwq.kw_prepost.count, + GetLSeqSummary(kwq.kw_prepost.lseq), GetSSeqSummary(kwq.kw_prepost.sseq), + intr_type, kwq.kw_intr.count, + GetSSeqSummary(kwq.kw_intr.seq), GetSSeqSummary(kwq.kw_intr.returnbits)) + + rs += "\twaiting readers:\n" + for kwe in IterateTAILQ_HEAD(kwq.kw_ksynqueues[0].ksynq_kwelist, 'kwe_list'): + rs += "\t" + GetKweSummary.header + "\n" + rs += "\t" + GetKweSummary(kwe) + "\n" + + rs += "\twaiting writers:\n" + for kwe in IterateTAILQ_HEAD(kwq.kw_ksynqueues[1].ksynq_kwelist, 'kwe_list'): + rs += "\t" + GetKweSummary.header + "\n" + rs += "\t" + GetKweSummary(kwe) + "\n" + + if kwq.kw_turnstile: + rs += GetTurnstileSummary.header + "\n" + rs += GetTurnstileSummary(Cast(kwq.kw_turnstile, "struct turnstile *")) + + return rs + +@lldb_type_summary(['ksyn_waitq_element *', 'ksyn_waitq_element_t']) +@header("{:<20s} {:<20s} {:<10s} {:<10s} {:<20s} {:<20s}".format('kwe', 'kwq', 'lseq', 'state', 'uthread', 'thread')) +def GetKweSummary(kwe): + format = "{:<#20x} {:<#20x} {:<10s} {:<10s} {:<#20x} {:<#20x}" + kwe = Cast(kwe, 'struct ksyn_waitq_element *') + state = "" + if kwe.kwe_state == 1: + state = "INWAIT" + elif kwe.kwe_state == 2: + state = "PPOST" + elif kwe.kwe_state == 3: + state = "BROAD" + else: + state = "{:#10x}".format(kwe.kwe_state) + return format.format(kwe, kwe.kwe_kwqqueue, GetLSeqSummary(kwe.kwe_lockseq), state, kwe.kwe_uth, kwe.kwe_thread) + +@header("{0: <24s} {1: <24s} {2: <24s}".format('thread', 'thread_id', 'uthread')) +def GetPthreadSummary(thread): + format = "{0: <24s} {1: <24s} {2: <24s}" + + threadstr = str("{0: <#020x}".format(thread)) + if int(thread.static_param): + threadstr += "[WQ]" + + uthread = Cast(thread.uthread, "uthread_t") + uthreadstr = str("{0: <#020x}".format(uthread)) + + + return format.format(threadstr, hex(thread.thread_id), uthreadstr) + +@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags')) +def GetPthreadWorkqueueSummary(wq): + format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}" + procstr = str("{0: <#020x}".format(wq.wq_proc)) + wqstr = str("{0: <#020x}".format(wq)) + + flags = [] + if wq.wq_flags & 0x1: + flags.append("I") + if wq.wq_flags & 0x2: + flags.append("R") + if wq.wq_flags & 0x4: + flags.append("E") + + wqflags = [] + if wq.wq_lflags & 0x1: + wqflags.append("B") + if wq.wq_lflags & 0x2: + wqflags.append("W") + if wq.wq_lflags & 0x4: + wqflags.append("C") + if wq.wq_lflags & 0x8: + wqflags.append("L") + + return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags)) + +@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event')) +def GetPthreadWorkqueueDetail(wq): + format = " {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}" + # requests + schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6]) + activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6]) + return "\n".join([schedstr, activestr]) + +@lldb_command('showthreadpsynch') +def PthreadCurrentMutex(cmd_args=None): + """ + display information about a thread's pthread state + Syntax: (lldb) showthreadpsync + """ + if not cmd_args: + raise ArgumentError("No arguments passed") + + thread = kern.GetValueFromAddress(cmd_args[0], "thread_t") + print GetPthreadSummary.header + print GetPthreadSummary(thread) + + uthread = Cast(thread.uthread, "uthread_t") + kwe = Cast(addressof(uthread.uu_save.uus_kwe), 'struct ksyn_waitq_element *') + if not kwe or not kwe.kwe_kwqqueue: + print GetKweSummary.header + print GetKweSummary(kwe) + else: + print GetKwqSummary.header + print GetKwqSummary(kwe.kwe_kwqqueue) + +@lldb_command('showpthreadkwq') +def PthreadShowKsynQueue(cmd_args=None): + """ + display information about a pthread ksyn_wait_queue_t + Syntax: (lldb) showpthreadkwq + """ + if not cmd_args: + raise ArgumentError("No arguments passed") + + kwq = kern.GetValueFromAddress(cmd_args[0], "ksyn_wait_queue_t") + print GetKwqSummary.header + print GetKwqSummary(kwq) + +@lldb_command('showpthreadkwe') +def PthreadShowKsynElement(cmd_args=None): + """ + display information about a thread's ksyn_waitq_element + Syntax: (lldb) showpthreadkwe + """ + if not cmd_args: + raise ArgumentError("No arguments passed") + + kwe = kern.GetValueFromAddress(cmd_args[0], "struct ksyn_waitq_element *") + print GetKweSummary.header + print GetKweSummary(kwe) + +@lldb_command('showpthreadworkqueue') +def ShowPthreadWorkqueue(cmd_args=None): + """ + display information about a processes' pthread workqueue + Syntax: (lldb) showpthreadworkqueue + """ + + if not cmd_args: + raise ArgumentError("No arguments passed") + + proc = kern.GetValueFromAddress(cmd_args[0], "proc_t") + wq = Cast(proc.p_wqptr, "struct workqueue *"); + + print GetPthreadWorkqueueSummary.header + print GetPthreadWorkqueueSummary(wq) + + print GetPthreadWorkqueueDetail.header + print GetPthreadWorkqueueDetail(wq) + +def IterateTAILQ_HEAD(headval, element_name): + """ iterate over a TAILQ_HEAD in kernel. refer to bsd/sys/queue.h + params: + headval - value : value object representing the head of the list + element_name- str : string name of the field which holds the list links. + returns: + A generator does not return. It is used for iterating. + value : an object that is of type as headval->tqh_first. Always a pointer object + example usage: + list_head = kern.GetGlobalVariable('mountlist') + for entryobj in IterateTAILQ_HEAD(list_head, 'mnt_list'): + print GetEntrySummary(entryobj) + """ + iter_val = headval.tqh_first + while unsigned(iter_val) != 0 : + yield iter_val + iter_val = iter_val.__getattr__(element_name).tqe_next + #end of yield loop + +def __lldb_init_module(debugger, internal_dict): + pass diff --git a/lldbmacros/pthread.py b/lldbmacros/pthread.py deleted file mode 100644 index a24779c..0000000 --- a/lldbmacros/pthread.py +++ /dev/null @@ -1,152 +0,0 @@ -from xnu import * -import struct - -@header("{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}".format('sig', 'tid', 'options', 'lseq', 'useq')) -def GetUserMutexSummary(task, uaddr): - if int(task.t_flags) & 0x1: - mtxlayout = "QIIhhIQIII" - padoffset = 1 - else: - mtxlayout = "QIIhhQIII" - padoffset = 0 - - data = GetUserDataAsString(task, uaddr, struct.calcsize(mtxlayout)) - info = struct.unpack(mtxlayout, data) - - format = "{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}" - sigstr = str("{0: <#020x}".format(info[0])) - - # the options field dictates whether we were created misaligned - if info[2] & 0x800: - lseq = info[7+padoffset] - useq = info[8+padoffset] - else: - lseq = info[6+padoffset] - useq = info[7+padoffset] - - return format.format(sigstr, hex(info[5+padoffset]), hex(info[2]), hex(lseq), hex(useq)) - -@lldb_command('showusermutex') -def PthreadShowUserMutex(cmd_args=None): - """ - display information about a userspace mutex at a given address - Syntax: (lldb) showusermutex - """ - if not cmd_args: - raise ArgumentError("No arguments passed") - task = kern.GetValueFromAddress(cmd_args[0], "task_t") - uaddr = kern.GetValueFromAddress(cmd_args[1], "user_addr_t") - - print GetUserMutexSummary.header - print GetUserMutexSummary(task, uaddr) - -@lldb_type_summary(['ksyn_waitq_element *', 'ksyn_waitq_element_t']) -@header("{0: <24s} {1: <24s} {2: <24s} {3: <10s}".format('kwe', 'kwq', 'uaddr', 'type')) -def GetKweSummary(kwe): - format = "{0: <24s} {1: <24s} {2: <24s} {3: <10s}" - kwe = Cast(addressof(kwe), "ksyn_waitq_element_t") - kwestr = str("{0: <#020x}".format(kwe)) - - kwq = Cast(kwe.kwe_kwqqueue, "ksyn_wait_queue_t") - kwqstr = str("{0: <#020x}".format(kwq)) - uaddrstr = str("{0: <#020x}".format(kwq.kw_addr)) - - kwqtype = "" - if kwq.kw_type & 0xff == 0x01: - kwqtype = "mtx" - if kwq.kw_type & 0xff == 0x02: - kwqtype = "cvar" - if kwq.kw_type & 0xff == 0x04: - kwqtype = "rwlock" - if kwq.kw_type & 0xff == 0x05: - kwqtype = "sema" - - return format.format(kwestr, kwqstr, uaddrstr, kwqtype) - -@header("{0: <24s} {1: <24s} {2: <24s}".format('thread', 'thread_id', 'uthread')) -def GetPthreadSummary(thread): - format = "{0: <24s} {1: <24s} {2: <24s}" - - threadstr = str("{0: <#020x}".format(thread)) - if int(thread.static_param): - threadstr += "[WQ]" - - uthread = Cast(thread.uthread, "uthread_t") - uthreadstr = str("{0: <#020x}".format(uthread)) - - - return format.format(threadstr, hex(thread.thread_id), uthreadstr) - -@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags')) -def GetPthreadWorkqueueSummary(wq): - format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}" - procstr = str("{0: <#020x}".format(wq.wq_proc)) - wqstr = str("{0: <#020x}".format(wq)) - - flags = [] - if wq.wq_flags & 0x1: - flags.append("I") - if wq.wq_flags & 0x2: - flags.append("R") - if wq.wq_flags & 0x4: - flags.append("E") - - wqflags = [] - if wq.wq_lflags & 0x1: - wqflags.append("B") - if wq.wq_lflags & 0x2: - wqflags.append("W") - if wq.wq_lflags & 0x4: - wqflags.append("C") - if wq.wq_lflags & 0x8: - wqflags.append("L") - - return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags)) - -@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event')) -def GetPthreadWorkqueueDetail(wq): - format = " {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}" - # requests - schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6]) - activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6]) - return "\n".join([schedstr, activestr]) - -@lldb_command('showpthreadstate') -def PthreadCurrentMutex(cmd_args=None): - """ - display information about a thread's pthread state - Syntax: (lldb) showpthreadstate - """ - if not cmd_args: - raise ArgumentError("No arguments passed") - - thread = kern.GetValueFromAddress(cmd_args[0], "thread_t") - print GetPthreadSummary.header - print GetPthreadSummary(thread) - - uthread = Cast(thread.uthread, "uthread_t") - kwe = addressof(uthread.uu_kevent.uu_kwe) - print GetKweSummary.header - print GetKweSummary(kwe) - -@lldb_command('showpthreadworkqueue') -def ShowPthreadWorkqueue(cmd_args=None): - """ - display information about a processes' pthread workqueue - Syntax: (lldb) showpthreadworkqueue - """ - - if not cmd_args: - raise ArgumentError("No arguments passed") - - proc = kern.GetValueFromAddress(cmd_args[0], "proc_t") - wq = Cast(proc.p_wqptr, "struct workqueue *"); - - print GetPthreadWorkqueueSummary.header - print GetPthreadWorkqueueSummary(wq) - - print GetPthreadWorkqueueDetail.header - print GetPthreadWorkqueueDetail(wq) - -def __lldb_init_module(debugger, internal_dict): - pass diff --git a/man/pthread_mutexattr.3 b/man/pthread_mutexattr.3 index 13e0861..756c407 100644 --- a/man/pthread_mutexattr.3 +++ b/man/pthread_mutexattr.3 @@ -81,6 +81,10 @@ .Fn pthread_mutexattr_settype "pthread_mutexattr_t *attr" "int type" .Ft int .Fn pthread_mutexattr_gettype "pthread_mutexattr_t *attr" "int *type" +.Ft int +.Fn pthread_mutexattr_setpolicy_np "pthread_mutexattr_t *attr" "int policy" +.Ft int +.Fn pthread_mutexattr_getpolicy_np "pthread_mutexattr_t *attr" "int *policy" .Sh DESCRIPTION Mutex attributes are used to specify parameters to .Fn pthread_mutex_init . @@ -164,6 +168,31 @@ This is the default mutex type for functions copy the type value of the attribute to the location pointed to by the second parameter. .Pp The +.Fn pthread_mutexattr_setpolicy_np +function sets the mutex +.Fa policy +value of the attribute. +Valid mutex policies are: +.Bl -tag -width "XXX" -offset 2n +.It Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP +The first-fit mutex policy allows acquisition of the mutex to occur in any +order. This policy is similar in operation to os_unfair_lock, new contending +acquirers may obtain ownership of the mutex ahead of existing waiters. +.It Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP +The fairshare mutex policy guarantees that ownership of a contended mutex will +be granted to waiters on a strictly ordered first-in, first-out basis. That is, +a mutex holder that unlocks the mutex and then attempts to relock will wait +behind existing threads already waiting on the mutex before being granted +ownership again. +.El +.Pp +The +.Fn pthread_mutexattr_getpolicy_np +function copies the mutex +.Fa policy +value of the attribute to the location pointed to by the second parameter. +.Pp +The .Fn pthread_mutexattr_set* functions set the attribute that corresponds to each function name. .Pp @@ -174,6 +203,39 @@ to the location pointed to by the second function parameter. .Sh RETURN VALUES If successful, these functions return 0. Otherwise, an error number is returned to indicate the error. +.Sh ENVIRONMENT +The following environment variables change the behavior of the pthread mutex +implementation. +.Bl -tag -width "XXX" -offset 2n +.It Ev PTHREAD_MUTEX_DEFAULT_POLICY +Controls the process-wide policy used when initializing a pthread_mutex_t that +has not had a policy set via +.Fn pthread_mutexattr_setpolicy_np . +The valid values are mapped as: +.Pp +.Bl -tag -width "XXX" +.It Fa 1 +.Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP +.It Fa 3 +.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP +.El +.El +.Sh BACKWARDS COMPATIBILITY +Prior to macOS 10.14 (iOS and tvOS 12.0, watchOS 5.0) the only available +pthread mutex policy mode was +.Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP . +macOS 10.14 (iOS and tvOS 12.0, watchOS 5.0) introduces +.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP +and also makes this the default mode for mutexes initialized without a policy +attribute set. +.Pp +Attempting to use +.Fn pthread_mutexattr_setpolicy_np +to set the policy of a pthread_mutex_t to +.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP +on earlier releases will fail with +.Er EINVAL +and the mutex will continue to operate in fairshare mode. .Sh ERRORS The .Fn pthread_mutexattr_init @@ -252,6 +314,27 @@ function will fail if: Invalid value for .Fa attr . .El +.Pp +The +.Fn pthread_mutexattr_setpolicy_np +function will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +Invalid value for +.Fa attr . +.El +.Pp +The +.Fn pthread_mutexattr_getpolicy_np +function will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +The value specified either by +.Fa type +or +.Fa attr +is invalid. +.El .Sh SEE ALSO .Xr pthread_mutex_init 3 .Sh STANDARDS diff --git a/private/dependency_private.h b/private/dependency_private.h new file mode 100644 index 0000000..77d209f --- /dev/null +++ b/private/dependency_private.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2018 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef __PTHREAD_DEPENDENCY_PRIVATE__ +#define __PTHREAD_DEPENDENCY_PRIVATE__ + +#include +#include +#include +#include + +__BEGIN_DECLS + +OS_ASSUME_NONNULL_BEGIN + +/*! + * @typedef pthread_dependency_t + * + * @abstract + * A pthread dependency is a one-time dependency between a thread producing + * a value and a waiter thread, expressed to the system in a way + * that priority inversion avoidance can be applied if necessary. + * + * @discussion + * These tokens are one-time use, and meant to be on the stack of the waiter + * thread. + * + * These tokens must be both fulfilled and waited on, exactly one of each. + */ +typedef struct pthread_dependency_s { + uint32_t __pdep_owner; + uint32_t __pdep_opaque1; + uint64_t __pdep_opaque2; +} pthread_dependency_t; + +/*! + * @typedef pthread_dependency_attr_t + * + * @abstract + * An opaque type to allow for future expansion of the pthread_dependency + * interface. + */ +typedef struct pthread_dependency_attr_s pthread_dependency_attr_t; + +#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE) || defined(__cplusplus) +/*! + * @macro PTHREAD_DEPENDENCY_INITIALIZER_NP + * + * @abstract + * Initialize a one-time dependency token. + * + * @param __pthread + * The thread that will be waited on for this dependency to be fulfilled. + * It is expected that this thread will call pthread_dependency_fulfill_np(). + */ +#define PTHREAD_DEPENDENCY_INITIALIZER_NP(__pthread) \ + { pthread_mach_thread_np(__pthread), 0, 0 } +#endif + +/*! + * @function pthread_dependency_init_np + * + * @abstract + * Initialize a dependency token. + * + * @param __dependency + * A pointer to a dependency token to initialize. + * + * @param __pthread + * The thread that will be waited on for this dependency to be fulfilled. + * It is expected that this thread will call pthread_dependency_fulfill_np(). + * + * @param __attrs + * This argument is reserved for future expansion purposes, and NULL should be + * passed. + */ +__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) +OS_NONNULL1 OS_NONNULL2 OS_NOTHROW +void pthread_dependency_init_np(pthread_dependency_t *__dependency, + pthread_t __pthread, pthread_dependency_attr_t *_Nullable __attrs); + +/*! + * @function pthread_dependency_fulfill_np + * + * @abstract + * Fulfill a dependency. + * + * @discussion + * Calling pthread_dependency_fulfill_np() with a token that hasn't been + * initialized yet, or calling pthread_dependency_fulfill_np() on the same + * dependency token more than once is undefined and will cause the process + * to be terminated. + * + * The thread that calls pthread_dependency_fulfill_np() must be the same + * as the pthread_t that was specified when initializing the token. Not doing so + * is undefined and will cause the process to be terminated. + * + * @param __dependency + * A pointer to a dependency token that was previously initialized. + * + * @param __value + * An optional value that can be returned through the dependency token + * to the waiter. + */ +__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) +OS_NONNULL1 OS_NOTHROW +void pthread_dependency_fulfill_np(pthread_dependency_t *__dependency, + void * _Nullable __value); + +/*! + * @function pthread_dependency_wait_np + * + * @abstract + * Wait on a dependency. + * + * @discussion + * Calling pthread_dependency_wait_np() with a token that hasn't been + * initialized yet, or calling pthread_dependency_wait_np() on the same + * dependency token more than once is undefined and will cause the process + * to be terminated. + * + * If the dependency is not fulfilled yet when this function is called, priority + * inversion avoidance will be applied to the thread that was specified when + * initializing the token, to ensure that it can call + * pthread_dependency_fulfill_np() without causing a priority inversion for the + * thread calling pthread_dependency_wait_np(). + * + * @param __dependency + * A pointer to a dependency token that was previously initialized with + * PTHREAD_DEPENDENCY_INITIALIZER_NP() or pthread_dependency_init_np(). + * + * @returns + * The value that was passed to pthread_dependency_fulfill_np() as the `__value` + * argument. + */ +__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) +OS_NONNULL1 OS_NOTHROW +void *_Nullable pthread_dependency_wait_np(pthread_dependency_t *__dependency); + +OS_ASSUME_NONNULL_END + +__END_DECLS + +#endif //__PTHREAD_DEPENDENCY_PRIVATE__ diff --git a/private/private.h b/private/private.h index b98a350..b321442 100644 --- a/private/private.h +++ b/private/private.h @@ -93,6 +93,8 @@ int pthread_chdir_np(char *path); __API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0)) int pthread_fchdir_np(int fd); +__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) +int pthread_attr_setcpupercent_np(pthread_attr_t * __restrict, int, unsigned long); #ifdef _os_tsd_get_base @@ -107,17 +109,17 @@ __header_always_inline uint64_t _pthread_threadid_self_np_direct(void) { #ifndef __i386__ - if (_pthread_has_direct_tsd()) { + if (_pthread_has_direct_tsd()) { #ifdef OS_GS_RELATIVE - return *(uint64_t OS_GS_RELATIVE *)(_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET); + return *(uint64_t OS_GS_RELATIVE *)(_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET); #else - return *(uint64_t*)((char *)_os_tsd_get_base() + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET); + return *(uint64_t*)((char *)_os_tsd_get_base() + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET); #endif - } + } #endif - uint64_t threadid = 0; - pthread_threadid_np(NULL, &threadid); - return threadid; + uint64_t threadid = 0; + pthread_threadid_np(NULL, &threadid); + return threadid; } #endif // _os_tsd_get_base diff --git a/private/qos_private.h b/private/qos_private.h index 50f273a..6068a82 100644 --- a/private/qos_private.h +++ b/private/qos_private.h @@ -25,6 +25,7 @@ #define _QOS_PRIVATE_H #include +#include #include /* qos_class_t */ #include @@ -33,48 +34,6 @@ #include #endif -// pthread_priority_t is an on opaque integer that is guaranteed to be ordered such that -// combations of QoS classes and relative priorities are ordered numerically, according to -// their combined priority. -typedef unsigned long pthread_priority_t; - -// masks for splitting the handling the contents of a pthread_priority_t, the mapping from -// qos_class_t to the class bits, however, is intentionally not exposed. -#define _PTHREAD_PRIORITY_FLAGS_MASK 0xff000000 -#define _PTHREAD_PRIORITY_FLAGS_SHIFT (24ull) -#define _PTHREAD_PRIORITY_ENCODING_MASK 0x00a00000 -#define _PTHREAD_PRIORITY_ENCODING_SHIFT (22ull) -#define _PTHREAD_PRIORITY_ENCODING_V0 0x00000000 -#define _PTHREAD_PRIORITY_ENCODING_V1 0x00400000 /* unused */ -#define _PTHREAD_PRIORITY_ENCODING_V2 0x00800000 /* unused */ -#define _PTHREAD_PRIORITY_ENCODING_V3 0x00a00000 /* unused */ -#define _PTHREAD_PRIORITY_QOS_CLASS_MASK 0x003fff00 -#define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT (8ull) -#define _PTHREAD_PRIORITY_PRIORITY_MASK 0x000000ff -#define _PTHREAD_PRIORITY_PRIORITY_SHIFT (0) - -#define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG 0x80000000 -#define _PTHREAD_PRIORITY_INHERIT_FLAG 0x40000000 -#define _PTHREAD_PRIORITY_ROOTQUEUE_FLAG 0x20000000 -// Used to indicate to the pthread kext that the provided event manager thread -// priority is actually a scheduling priority not a QoS. We can have ROOTQUEUE_FLAG -// perform double duty because it's never provided to the kernel. -#define _PTHREAD_PRIORITY_SCHED_PRI_FLAG 0x20000000 -#define _PTHREAD_PRIORITY_SCHED_PRI_MASK 0x0000ffff -#define _PTHREAD_PRIORITY_ENFORCE_FLAG 0x10000000 -#define _PTHREAD_PRIORITY_OVERRIDE_FLAG 0x08000000 - -// libdispatch defines the following, so it's not safe to use for anything we -// expect to be passed in from userspace -#define _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG 0x04000000 - -// The event manager flag indicates that this thread/request is for a event -// manager thread. There can only ever be one event manager thread at a time and -// it is brought up at the highest of all event manager priorities passed to the -// kext. -#define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG 0x02000000 -#define _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG 0x01000000 - // redeffed here to avoid leaving __QOS_ENUM defined in the public header #define __QOS_ENUM(name, type, ...) enum { __VA_ARGS__ }; typedef type name##_t #define __QOS_AVAILABLE_10_10 diff --git a/private/tsd_private.h b/private/tsd_private.h index f91c1f6..f9260fb 100644 --- a/private/tsd_private.h +++ b/private/tsd_private.h @@ -68,6 +68,10 @@ #define __TSD_RETURN_TO_KERNEL 5 #endif +#ifndef __TSD_PTR_MUNGE +#define __TSD_PTR_MUNGE 7 +#endif + #ifndef __TSD_MACH_SPECIAL_REPLY #define __TSD_MACH_SPECIAL_REPLY 8 #endif @@ -81,6 +85,7 @@ #define _PTHREAD_TSD_SLOT_MACH_THREAD_SELF __TSD_MACH_THREAD_SELF #define _PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS __TSD_THREAD_QOS_CLASS #define _PTHREAD_TSD_SLOT_RETURN_TO_KERNEL __TSD_RETURN_TO_KERNEL +#define _PTHREAD_TSD_SLOT_PTR_MUNGE __TSD_PTR_MUNGE #define _PTHREAD_TSD_SLOT_MACH_SPECIAL_REPLY __TSD_MACH_SPECIAL_REPLY //#define _PTHREAD_TSD_SLOT_SEMAPHORE_CACHE __TSD_SEMAPHORE_CACHE diff --git a/private/workqueue_private.h b/private/workqueue_private.h index 0b0a001..9cd0e95 100644 --- a/private/workqueue_private.h +++ b/private/workqueue_private.h @@ -179,6 +179,14 @@ __API_AVAILABLE(macos(10.10.2)) int _pthread_workqueue_asynchronous_override_reset_all_self(void); +__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) +int +_pthread_workloop_create(uint64_t workloop_id, uint64_t options, pthread_attr_t *attr); + +__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0)) +int +_pthread_workloop_destroy(uint64_t workloop_id); + __END_DECLS #endif // __PTHREAD_WORKQUEUE_H__ diff --git a/pthread/introspection.h b/pthread/introspection.h index 1829208..10b719a 100644 --- a/pthread/introspection.h +++ b/pthread/introspection.h @@ -64,18 +64,40 @@ typedef void (*pthread_introspection_hook_t)(unsigned int event, /*! * @enum pthread_introspection_event_t + * Events sent by libpthread about threads lifetimes. * - * @constant PTHREAD_INTROSPECTION_THREAD_CREATE - * pthread_t was created. + * @const PTHREAD_INTROSPECTION_THREAD_CREATE + * The specified pthread_t was created, and there will be a paired + * PTHREAD_INTROSPECTION_THREAD_DESTROY event. However, there may not be + * a START/TERMINATE pair of events for this pthread_t. * - * @constant PTHREAD_INTROSPECTION_THREAD_START - * Thread has started and stack was allocated. + * Starting with macOS 10.14, and iOS 12, this event is always sent before + * PTHREAD_INTROSPECTION_THREAD_START is sent. This event is however not sent + * for the main thread. * - * @constant PTHREAD_INTROSPECTION_THREAD_TERMINATE - * Thread is about to be terminated and stack will be deallocated. + * This event may not be sent from the context of the passed in pthread_t. * - * @constant PTHREAD_INTROSPECTION_THREAD_DESTROY - * pthread_t is about to be destroyed. + * Note that all properties of this thread may not be functional yet, and it is + * not permitted to call functions on this thread past observing its address. + * + * @const PTHREAD_INTROSPECTION_THREAD_START + * Thread has started and its stack was allocated. There will be a matching + * PTHREAD_INTROSPECTION_THREAD_TERMINATE event. + * + * This event is always sent from the context of the passed in pthread_t. + * + * @const PTHREAD_INTROSPECTION_THREAD_TERMINATE + * Thread is about to be terminated and stack will be deallocated. This always + * matches a PTHREAD_INTROSPECTION_THREAD_START event. + * + * This event is always sent from the context of the passed in pthread_t. + * + * @const PTHREAD_INTROSPECTION_THREAD_DESTROY + * pthread_t is about to be destroyed. This always matches + * a PTHREAD_INTROSPECTION_THREAD_CREATE event, but there may not have been + * a START/TERMINATE pair of events for this pthread_t. + * + * This event may not be sent from the context of the passed in pthread_t. */ enum { PTHREAD_INTROSPECTION_THREAD_CREATE = 1, diff --git a/pthread/pthread.h b/pthread/pthread.h index 0e2ecb7..f5fdff6 100644 --- a/pthread/pthread.h +++ b/pthread/pthread.h @@ -171,6 +171,12 @@ __BEGIN_DECLS #define PTHREAD_MUTEX_RECURSIVE 2 #define PTHREAD_MUTEX_DEFAULT PTHREAD_MUTEX_NORMAL +/* + * Mutex policy attributes + */ +#define PTHREAD_MUTEX_POLICY_FAIRSHARE_NP 1 +#define PTHREAD_MUTEX_POLICY_FIRSTFIT_NP 3 + /* * RWLock variables */ @@ -405,6 +411,10 @@ __API_AVAILABLE(macos(10.4), ios(2.0)) int pthread_mutexattr_gettype(const pthread_mutexattr_t * __restrict, int * __restrict); +__API_AVAILABLE(macos(10.13.4), ios(11.3), watchos(4.3), tvos(11.3)) +int pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t * __restrict, + int * __restrict); + __API_AVAILABLE(macos(10.4), ios(2.0)) int pthread_mutexattr_init(pthread_mutexattr_t *); @@ -420,6 +430,9 @@ int pthread_mutexattr_setpshared(pthread_mutexattr_t *, int); __API_AVAILABLE(macos(10.4), ios(2.0)) int pthread_mutexattr_settype(pthread_mutexattr_t *, int); +__API_AVAILABLE(macos(10.7), ios(5.0)) +int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int); + __SWIFT_UNAVAILABLE_MSG("Use lazily initialized globals instead") __API_AVAILABLE(macos(10.4), ios(2.0)) int pthread_once(pthread_once_t *, void (* _Nonnull)(void)); diff --git a/pthread/pthread_spis.h b/pthread/pthread_spis.h index a0ba754..91fb641 100644 --- a/pthread/pthread_spis.h +++ b/pthread/pthread_spis.h @@ -63,19 +63,13 @@ __BEGIN_DECLS #if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE) /* firstfit */ #define PTHREAD_FIRSTFIT_MUTEX_INITIALIZER {_PTHREAD_FIRSTFIT_MUTEX_SIG_init, {0}} + /* * Mutex attributes */ -#define _PTHREAD_MUTEX_POLICY_NONE 0 -#define _PTHREAD_MUTEX_POLICY_FAIRSHARE 1 -#define _PTHREAD_MUTEX_POLICY_FIRSTFIT 2 - -/* manipulate the mutex policy attributes */ -__API_AVAILABLE(macos(10.7), ios(5.0)) -int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int ); - -__API_AVAILABLE(macos(10.13.4), ios(11.3)) -int pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *, int * ); +#define _PTHREAD_MUTEX_POLICY_NONE PTHREAD_MUTEX_POLICY_NONE +#define _PTHREAD_MUTEX_POLICY_FAIRSHARE PTHREAD_MUTEX_POLICY_FAIRSHARE_NP +#define _PTHREAD_MUTEX_POLICY_FIRSTFIT PTHREAD_MUTEX_POLICY_FIRSTFIT_NP #endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */ diff --git a/pthread/stack_np.h b/pthread/stack_np.h new file mode 100644 index 0000000..9b5f513 --- /dev/null +++ b/pthread/stack_np.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 Apple Inc. All rights reserved. + * + * @APPLE_APACHE_LICENSE_HEADER_START@ + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @APPLE_APACHE_LICENSE_HEADER_END@ + */ + +#ifndef __PTHREAD_STACK_NP__ +#define __PTHREAD_STACK_NP__ + +#include +#include +#include +#include +#include +#include + +OS_ASSUME_NONNULL_BEGIN + +/*! @header + * Low-level API to introspect thread stacks. + */ + +__BEGIN_DECLS + +/*! + * @function pthread_stack_frame_decode_np + * + * @abstract + * Decodes the return address and the next stack frame address + * from the given stack frame address. + * + * @discussion + * Validation of the frame address is not performed by this function. + * The caller is responsible for making sure the frame address is valid, + * for example using pthread_get_stackaddr_np() and pthread_get_stacksize_np(). + * + * @param frame_addr + * A valid stack frame address such as __builtin_frame_address(0) or the return + * value of a previous call to pthread_stack_frame_decode_np(). + * + * @param return_addr + * An optional out paramter that will be filled with the return address stored + * at the specified stack frame. + * + * @returns + * This returns the next frame address stored at the specified stack frame. + */ +__OSX_AVAILABLE(10.14) __IOS_AVAILABLE(12.0) +__TVOS_AVAILABLE(12.0) __WATCHOS_AVAILABLE(5.0) +uintptr_t +pthread_stack_frame_decode_np(uintptr_t frame_addr, + uintptr_t *_Nullable return_addr); + +__END_DECLS + +OS_ASSUME_NONNULL_END + +#endif // __PTHREAD_STACK_NP__ diff --git a/src/internal.h b/src/internal.h index 9f2e127..c9c16c7 100644 --- a/src/internal.h +++ b/src/internal.h @@ -70,6 +70,8 @@ typedef struct _pthread_attr_t pthread_attr_t; #include #include #include +#include +#include #define __OS_EXPOSE_INTERNALS__ 1 #include @@ -125,19 +127,24 @@ typedef os_unfair_lock _pthread_lock; #define _PTHREAD_UNLOCK(lock) os_unfair_lock_unlock_inline(&(lock)) #define _PTHREAD_UNLOCK_FROM_MACH_THREAD(lock) os_unfair_lock_unlock_inline_no_tsd_4libpthread(&(lock)) +#define _PTHREAD_POLICY_IS_FIXEDPRI(x) ((x) == SCHED_RR || (x) == SCHED_FIFO) + +extern int __is_threaded; +extern int __unix_conforming; + // List of all pthreads in the process. TAILQ_HEAD(__pthread_list, _pthread); -extern struct __pthread_list __pthread_head; +PTHREAD_NOEXPORT extern struct __pthread_list __pthread_head; // Lock protects access to above list. -extern _pthread_lock _pthread_list_lock; +PTHREAD_NOEXPORT extern _pthread_lock _pthread_list_lock; -extern int __is_threaded; +PTHREAD_NOEXPORT extern uint32_t _main_qos; #if PTHREAD_DEBUG_LOG #include -extern int _pthread_debuglog; -extern uint64_t _pthread_debugstart; +PTHREAD_NOEXPORT extern int _pthread_debuglog; +PTHREAD_NOEXPORT extern uint64_t _pthread_debugstart; #endif /* @@ -153,6 +160,8 @@ extern uint64_t _pthread_debugstart; #define _INTERNAL_POSIX_THREAD_KEYS_END 768 #endif +#define PTHREAD_T_OFFSET 0 + #define MAXTHREADNAMESIZE 64 #define _PTHREAD_T typedef struct _pthread { @@ -165,52 +174,56 @@ typedef struct _pthread { // // SPI - These fields are private. // - // these fields are globally protected by _pthread_list_lock: - uint32_t childrun:1, - parentcheck:1, - childexit:1, - pad3:29; - - _pthread_lock lock; // protect access to everything below - uint32_t detached:8, - inherit:8, - policy:8, - kernalloc:1, - schedset:1, - wqthread:1, - wqkillset:1, - pad:4; - -#if defined(__LP64__) - uint32_t pad0; -#endif - - void *(*fun)(void*); // thread start routine - void *arg; // thread start routine argument - void *exit_value; // thread exit value storage - - semaphore_t joiner_notify; // pthread_join notification - - int max_tsd_key; - int cancel_state; // whether the thread can be cancelled - int cancel_error; - int err_no; // thread-local errno + // + // Fields protected by _pthread_list_lock + // - struct _pthread *joiner; + TAILQ_ENTRY(_pthread) tl_plist; // global thread list [aligned] + struct pthread_join_context_s *tl_join_ctx; + void *tl_exit_value; + uint32_t tl_policy:8, + tl_joinable:1, + tl_joiner_cleans_up:1, + tl_has_custom_stack:1, + __tl_pad:21; + // MACH_PORT_NULL if no joiner + // tsd[_PTHREAD_TSD_SLOT_MACH_THREAD_SELF] when has a joiner + // MACH_PORT_DEAD if the thread exited + uint32_t tl_exit_gate; + struct sched_param tl_param; - struct sched_param param; // [aligned] + // + // Fields protected by pthread_t::lock + // - TAILQ_ENTRY(_pthread) plist; // global thread list [aligned] + _pthread_lock lock; + uint16_t max_tsd_key; + uint16_t inherit:8, + kernalloc:1, + schedset:1, + wqthread:1, + wqkillset:1, + wqoutsideqos:1, + __flags_pad:3; char pthread_name[MAXTHREADNAMESIZE]; // includes NUL [aligned] - void *stackaddr; // base of the stack (page aligned) - size_t stacksize; // size of stack (page multiple and >= PTHREAD_STACK_MIN) - - void* freeaddr; // stack/thread allocation base address - size_t freesize; // stack/thread allocation size - size_t guardsize; // guard page size in bytes + void *(*fun)(void *); // thread start routine + void *wq_kqid_ptr; // wqthreads (workloop) + void *arg; // thread start routine argument + int wq_nevents; // wqthreads (workloop / kevent) + uint16_t wq_retop; // wqthreads + uint8_t cancel_state; // whether the thread can be canceled [atomic] + uint8_t canceled; // 4597450 set if conformant cancelation happened + errno_t cancel_error; + errno_t err_no; // thread-local errno + + void *stackaddr; // base of the stack (page aligned) + void *stackbottom; // stackaddr - stacksize + void *freeaddr; // stack/thread allocation base address + size_t freesize; // stack/thread allocation size + size_t guardsize; // guard page size in bytes // tsd-base relative accessed elements __attribute__((aligned(8))) @@ -228,39 +241,39 @@ typedef struct _pthread { void *tsd[_EXTERNAL_POSIX_THREAD_KEYS_MAX + _INTERNAL_POSIX_THREAD_KEYS_MAX]; } *pthread_t; - +#define _PTHREAD_ATTR_REFILLMS_MAX ((2<<24) - 1) struct _pthread_attr_t { - long sig; - _pthread_lock lock; - uint32_t detached:8, + long sig; + size_t guardsize; // size in bytes of stack overflow guard area + void *stackaddr; // stack base; vm_page_size aligned + size_t stacksize; // stack size; multiple of vm_page_size and >= PTHREAD_STACK_MIN + union { + struct sched_param param; // [aligned] + unsigned long qosclass; // pthread_priority_t + }; + uint32_t + detached:8, inherit:8, policy:8, - fastpath:1, schedset:1, qosset:1, - unused:5; - struct sched_param param; // [aligned] - void *stackaddr; // stack base; vm_page_size aligned - size_t stacksize; // stack size; multiple of vm_page_size and >= PTHREAD_STACK_MIN - size_t guardsize; // size in bytes of stack overflow guard area - unsigned long qosclass; + policyset:1, + cpupercentset:1, + defaultguardpage:1, + unused:3; + uint32_t + cpupercent:8, + refillms:24; #if defined(__LP64__) - uint32_t _reserved[2]; + uint32_t _reserved[4]; #else - uint32_t _reserved[1]; + uint32_t _reserved[2]; #endif }; /* * Mutex attributes */ -#define _PTHREAD_MUTEX_POLICY_NONE 0 -#define _PTHREAD_MUTEX_POLICY_FAIRSHARE 1 -#define _PTHREAD_MUTEX_POLICY_FIRSTFIT 2 -#define _PTHREAD_MUTEX_POLICY_REALTIME 3 -#define _PTHREAD_MUTEX_POLICY_ADAPTIVE 4 -#define _PTHREAD_MUTEX_POLICY_PRIPROTECT 5 -#define _PTHREAD_MUTEX_POLICY_PRIINHERIT 6 #define _PTHREAD_MUTEXATTR_T typedef struct { @@ -269,7 +282,7 @@ typedef struct { uint32_t protocol:2, type:2, pshared:2, - policy:3, + opt:3, unused:23; } pthread_mutexattr_t; @@ -285,6 +298,21 @@ struct _pthread_mutex_options { unused:2, lock_count:16; }; +// +#define _PTHREAD_MUTEX_POLICY_LAST (PTHREAD_MUTEX_POLICY_FIRSTFIT_NP + 1) +#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE 1 +#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT 2 +#define _PTHREAD_MTX_OPT_POLICY_DEFAULT _PTHREAD_MTX_OPT_POLICY_FIRSTFIT +// The following _pthread_mutex_options defintions exist in synch_internal.h +// such that the kernel extension can test for flags. They must be kept in +// sync with the bit values in the struct above. +// _PTHREAD_MTX_OPT_PSHARED 0x010 +// _PTHREAD_MTX_OPT_NOTIFY 0x1000 +// _PTHREAD_MTX_OPT_MUTEX 0x2000 + +// The fixed mask is used to mask out portions of the mutex options that +// change on a regular basis (notify, lock_count). +#define _PTHREAD_MTX_OPT_FIXED_MASK 0x27ff typedef struct { long sig; @@ -429,12 +457,6 @@ _pthread_selfid_direct(void) #define _PTHREAD_KERN_MUTEX_SIG 0x34567812 /* */ #define _PTHREAD_KERN_RWLOCK_SIG 0x56781234 /* */ -#define _PTHREAD_CREATE_PARENT 4 -#define _PTHREAD_EXITED 8 -// 4597450: begin -#define _PTHREAD_WASCANCEL 0x10 -// 4597450: end - #if defined(DEBUG) #define _PTHREAD_MUTEX_OWNER_SELF pthread_self() #else @@ -454,11 +476,6 @@ extern boolean_t swtch_pri(int); /* Prototypes. */ /* Internal globals. */ -PTHREAD_NOEXPORT extern int __pthread_supported_features; - -/* Functions defined in machine-dependent files. */ -PTHREAD_NOEXPORT void _pthread_setup(pthread_t th, void (*f)(pthread_t), void *sp, int suspended, int needresume); - PTHREAD_NOEXPORT void _pthread_tsd_cleanup(pthread_t self); PTHREAD_NOEXPORT int _pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t * flagp, uint32_t ** pmtxp, uint32_t * mgenp, uint32_t * ugenp); @@ -468,8 +485,8 @@ PTHREAD_NOEXPORT void* malloc(size_t); PTHREAD_NOEXPORT void free(void*); /* syscall interfaces */ -extern uint32_t __psynch_mutexwait(pthread_mutex_t * mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags); -extern uint32_t __psynch_mutexdrop(pthread_mutex_t * mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags); +extern uint32_t __psynch_mutexwait(_pthread_mutex * mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags); +extern uint32_t __psynch_mutexdrop(_pthread_mutex * mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags); extern uint32_t __psynch_cvbroad(pthread_cond_t * cv, uint64_t cvlsgen, uint64_t cvudgen, uint32_t flags, pthread_mutex_t * mutex, uint64_t mugen, uint64_t tid); extern uint32_t __psynch_cvsignal(pthread_cond_t * cv, uint64_t cvlsgen, uint32_t cvugen, int thread_port, pthread_mutex_t * mutex, uint64_t mugen, uint64_t tid, uint32_t flags); @@ -489,7 +506,9 @@ PTHREAD_EXTERN int __proc_info(int callnum, int pid, int flavor, uint64_t arg, void * buffer, int buffersize); -PTHREAD_NOEXPORT int _pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming); +PTHREAD_NOEXPORT +void +_pthread_deallocate(pthread_t t, bool from_mach_thread); PTHREAD_NORETURN PTHREAD_NOEXPORT void @@ -499,6 +518,10 @@ PTHREAD_NORETURN PTHREAD_NOEXPORT void __pthread_abort_reason(const char *fmt, ...) __printflike(1,2); +PTHREAD_NOEXPORT +thread_qos_t +_pthread_qos_class_to_thread_qos(qos_class_t qos); + PTHREAD_NOEXPORT void _pthread_set_main_qos(pthread_priority_t qos); @@ -515,7 +538,7 @@ PTHREAD_EXPORT void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); -PTHREAD_EXPORT +PTHREAD_NORETURN PTHREAD_EXPORT void _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int flags, int nkevents); @@ -531,9 +554,13 @@ PTHREAD_NOEXPORT_VARIANT void _pthread_clear_qos_tsd(mach_port_t thread_port); +#define PTHREAD_CONFORM_DARWIN_LEGACY 0 +#define PTHREAD_CONFORM_UNIX03_NOCANCEL 1 +#define PTHREAD_CONFORM_UNIX03_CANCELABLE 2 + PTHREAD_NOEXPORT_VARIANT void -_pthread_testcancel(pthread_t thread, int isconforming); +_pthread_testcancel(int conforming); PTHREAD_EXPORT void @@ -545,11 +572,11 @@ _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport); PTHREAD_NOEXPORT void -_pthread_setcancelstate_exit(pthread_t self, void *value_ptr, int conforming); +_pthread_setcancelstate_exit(pthread_t self, void *value_ptr); PTHREAD_NOEXPORT -void * -_pthread_get_exit_value(pthread_t t, int conforming); +semaphore_t +_pthread_joiner_prepost_wake(pthread_t thread); PTHREAD_ALWAYS_INLINE static inline mach_port_t @@ -647,60 +674,54 @@ _pthread_rwlock_check_signature_init(_pthread_rwlock *rwlock) return (rwlock->sig == _PTHREAD_RWLOCK_SIG_init); } -/* ALWAYS called with list lock and return with list lock */ +/* + * ALWAYS called without list lock and return with list lock held on success + * + * This weird calling convention exists because this function will sometimes + * drop the lock, and it's best callers don't have to remember this. + */ PTHREAD_ALWAYS_INLINE static inline bool -_pthread_is_valid_locked(pthread_t thread) +_pthread_validate_thread_and_list_lock(pthread_t thread) { pthread_t p; + if (thread == NULL) return false; loop: - TAILQ_FOREACH(p, &__pthread_head, plist) { - if (p == thread) { - int state = os_atomic_load(&p->cancel_state, relaxed); - if (state & _PTHREAD_CANCEL_INITIALIZED) { - return true; + _PTHREAD_LOCK(_pthread_list_lock); + TAILQ_FOREACH(p, &__pthread_head, tl_plist) { + if (p != thread) continue; + int state = os_atomic_load(&p->cancel_state, relaxed); + if (os_likely(state & _PTHREAD_CANCEL_INITIALIZED)) { + if (os_unlikely(p->sig != _PTHREAD_SIG)) { + PTHREAD_CLIENT_CRASH(0, "pthread_t was corrupted"); } - _PTHREAD_UNLOCK(_pthread_list_lock); - thread_switch(_pthread_kernel_thread(p), - SWITCH_OPTION_OSLOCK_DEPRESS, 1); - _PTHREAD_LOCK(_pthread_list_lock); - goto loop; + return true; } + _PTHREAD_UNLOCK(_pthread_list_lock); + thread_switch(_pthread_kernel_thread(p), + SWITCH_OPTION_OSLOCK_DEPRESS, 1); + goto loop; } + _PTHREAD_UNLOCK(_pthread_list_lock); return false; } -#define PTHREAD_IS_VALID_LOCK_THREAD 0x1 - PTHREAD_ALWAYS_INLINE static inline bool -_pthread_is_valid(pthread_t thread, int flags, mach_port_t *portp) +_pthread_is_valid(pthread_t thread, mach_port_t *portp) { mach_port_t kport = MACH_PORT_NULL; bool valid; - if (thread == NULL) { - return false; - } - if (thread == pthread_self()) { valid = true; kport = _pthread_kernel_thread(thread); - if (flags & PTHREAD_IS_VALID_LOCK_THREAD) { - _PTHREAD_LOCK(thread->lock); - } + } else if (!_pthread_validate_thread_and_list_lock(thread)) { + valid = false; } else { - _PTHREAD_LOCK(_pthread_list_lock); - if (_pthread_is_valid_locked(thread)) { - kport = _pthread_kernel_thread(thread); - valid = true; - if (flags & PTHREAD_IS_VALID_LOCK_THREAD) { - _PTHREAD_LOCK(thread->lock); - } - } else { - valid = false; - } + kport = _pthread_kernel_thread(thread); + valid = true; _PTHREAD_UNLOCK(_pthread_list_lock); } diff --git a/src/offsets.h b/src/offsets.h new file mode 100644 index 0000000..0e20385 --- /dev/null +++ b/src/offsets.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _POSIX_PTHREAD_OFFSETS_H +#define _POSIX_PTHREAD_OFFSETS_H + +#ifndef __ASSEMBLER__ +#define check_backward_offset(field, value) \ + _Static_assert(offsetof(struct _pthread, tsd) + value == \ + offsetof(struct _pthread, field), #value " is correct") +#define check_forward_offset(field, value) \ + _Static_assert(offsetof(struct _pthread, field) == value, \ + #value " is correct") +#else +#define check_backward_offset(field, value) +#define check_forward_offset(field, value) +#endif // __ASSEMBLER__ + +#if defined(__i386__) +#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET 140 +#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET 144 +#elif __LP64__ +#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET -48 +#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET -40 +#else +#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET -36 +#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET -32 +#endif + +#if defined(__i386__) +check_forward_offset(stackaddr, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET); +check_forward_offset(stackbottom, _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET); +#else +check_backward_offset(stackaddr, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET); +check_backward_offset(stackbottom, _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET); +#endif + +#endif /* _POSIX_PTHREAD_OFFSETS_H */ diff --git a/src/pthread.c b/src/pthread.c index 8e63bd3..c9c1b9b 100644 --- a/src/pthread.c +++ b/src/pthread.c @@ -56,6 +56,8 @@ #include "introspection_private.h" #include "qos_private.h" #include "tsd_private.h" +#include "pthread/stack_np.h" +#include "offsets.h" // included to validate the offsets at build time #include #include @@ -63,10 +65,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #define __APPLE_API_PRIVATE @@ -77,40 +81,46 @@ #include extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); + void *newp, size_t newlen); extern void __exit(int) __attribute__((noreturn)); extern int __pthread_kill(mach_port_t, int); -extern struct _pthread _thread; -extern int default_priority; +extern void _pthread_joiner_wake(pthread_t thread); +#if !VARIANT_DYLD +PTHREAD_NOEXPORT extern struct _pthread *_main_thread_ptr; +#define main_thread() (_main_thread_ptr) +#endif // VARIANT_DYLD -// -// Global variables -// +// Default stack size is 512KB; independent of the main thread's stack size. +#define DEFAULT_STACK_SIZE (size_t)(512 * 1024) -static void (*exitf)(int) = __exit; -PTHREAD_NOEXPORT void* (*_pthread_malloc)(size_t) = NULL; -PTHREAD_NOEXPORT void (*_pthread_free)(void *) = NULL; -#if PTHREAD_DEBUG_LOG -#include -int _pthread_debuglog; -uint64_t _pthread_debugstart; -#endif - -// This global should be used (carefully) by anyone needing to know if a -// pthread (other than the main thread) has been created. -int __is_threaded = 0; +// +// Global constants +// -int __unix_conforming = 0; +/* + * The pthread may be offset into a page. In that event, by contract + * with the kernel, the allocation will extend PTHREAD_SIZE from the + * start of the next page. There's also one page worth of allocation + * below stacksize for the guard page. + */ +#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread))) +#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size) +#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize)) -// _pthread_list_lock protects _pthread_count, access to the __pthread_head -// list, and the parentcheck, childrun and childexit flags of the pthread -// structure. Externally imported by pthread_cancelable.c. -PTHREAD_NOEXPORT _pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER; -PTHREAD_NOEXPORT struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head); -static int _pthread_count = 1; +static const pthread_attr_t _pthread_attr_default = { + .sig = _PTHREAD_ATTR_SIG, + .stacksize = 0, + .detached = PTHREAD_CREATE_JOINABLE, + .inherit = _PTHREAD_DEFAULT_INHERITSCHED, + .policy = _PTHREAD_DEFAULT_POLICY, + .defaultguardpage = true, + // compile time constant for _pthread_default_priority(0) + .qosclass = (1U << (THREAD_QOS_LEGACY - 1 + _PTHREAD_PRIORITY_QOS_CLASS_SHIFT)) | + ((uint8_t)-1 & _PTHREAD_PRIORITY_PRIORITY_MASK), +}; #if PTHREAD_LAYOUT_SPI @@ -124,315 +134,154 @@ const struct pthread_layout_offsets_s pthread_layout_offsets = { #endif // PTHREAD_LAYOUT_SPI // -// Static variables +// Global exported variables // -// Mach message notification that a thread needs to be recycled. -typedef struct _pthread_reap_msg_t { - mach_msg_header_t header; - pthread_t thread; - mach_msg_trailer_t trailer; -} pthread_reap_msg_t; +// This global should be used (carefully) by anyone needing to know if a +// pthread (other than the main thread) has been created. +int __is_threaded = 0; +int __unix_conforming = 0; -/* - * The pthread may be offset into a page. In that event, by contract - * with the kernel, the allocation will extend PTHREAD_SIZE from the - * start of the next page. There's also one page worth of allocation - * below stacksize for the guard page. - */ -#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread))) -#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size) -#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize)) +// +// Global internal variables +// -static pthread_attr_t _pthread_attr_default = { }; +// _pthread_list_lock protects _pthread_count, access to the __pthread_head +// list. Externally imported by pthread_cancelable.c. +struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head); +_pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER; + +uint32_t _main_qos; +#if VARIANT_DYLD // The main thread's pthread_t -PTHREAD_NOEXPORT struct _pthread _thread __attribute__((aligned(64))) = { }; +struct _pthread _main_thread __attribute__((aligned(64))) = { }; +#define main_thread() (&_main_thread) +#else // VARIANT_DYLD +struct _pthread *_main_thread_ptr; +#endif // VARIANT_DYLD -PTHREAD_NOEXPORT int default_priority; -static int max_priority; -static int min_priority; +#if PTHREAD_DEBUG_LOG +#include +int _pthread_debuglog; +uint64_t _pthread_debugstart; +#endif + +// +// Global static variables +// +static bool __workq_newapi; +static uint8_t default_priority; +#if !VARIANT_DYLD +static uint8_t max_priority; +static uint8_t min_priority; +#endif // !VARIANT_DYLD +static int _pthread_count = 1; static int pthread_concurrency; +static uintptr_t _pthread_ptr_munge_token; + +static void (*exitf)(int) = __exit; +#if !VARIANT_DYLD +static void *(*_pthread_malloc)(size_t) = NULL; +static void (*_pthread_free)(void *) = NULL; +#endif // !VARIANT_DYLD // work queue support data -static void (*__libdispatch_workerfunction)(pthread_priority_t) = NULL; -static void (*__libdispatch_keventfunction)(void **events, int *nevents) = NULL; -static void (*__libdispatch_workloopfunction)(uint64_t *workloop_id, void **events, int *nevents) = NULL; +PTHREAD_NORETURN +static void +__pthread_invalid_keventfunction(void **events, int *nevents) +{ + PTHREAD_CLIENT_CRASH(0, "Invalid kqworkq setup"); +} + +PTHREAD_NORETURN +static void +__pthread_invalid_workloopfunction(uint64_t *workloop_id, void **events, int *nevents) +{ + PTHREAD_CLIENT_CRASH(0, "Invalid kqwl setup"); +} +static pthread_workqueue_function2_t __libdispatch_workerfunction; +static pthread_workqueue_function_kevent_t __libdispatch_keventfunction = &__pthread_invalid_keventfunction; +static pthread_workqueue_function_workloop_t __libdispatch_workloopfunction = &__pthread_invalid_workloopfunction; static int __libdispatch_offset; +static int __pthread_supported_features; // supported feature set -// supported feature set -int __pthread_supported_features; -static bool __workq_newapi; +#if defined(__i386__) || defined(__x86_64__) +static mach_vm_address_t __pthread_stack_hint = 0xB0000000; +#else +#error no __pthread_stack_hint for this architecture +#endif // // Function prototypes // // pthread primitives -static int _pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack); -static int _pthread_deallocate(pthread_t t); - -static void _pthread_terminate_invoke(pthread_t t); - -static inline void _pthread_struct_init(pthread_t t, - const pthread_attr_t *attrs, - void *stack, - size_t stacksize, - void *freeaddr, - size_t freesize); +static inline void _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs, + void *stack, size_t stacksize, void *freeaddr, size_t freesize); +#if VARIANT_DYLD +static void _pthread_set_self_dyld(void); +#endif // VARIANT_DYLD static inline void _pthread_set_self_internal(pthread_t, bool needs_tsd_base_set); static void _pthread_dealloc_reply_port(pthread_t t); static void _pthread_dealloc_special_reply_port(pthread_t t); -static inline void __pthread_add_thread(pthread_t t, const pthread_attr_t *attr, bool parent, bool from_mach_thread); -static inline int __pthread_remove_thread(pthread_t t, bool child, bool *should_exit); +static inline void __pthread_started_thread(pthread_t t); static void _pthread_exit(pthread_t self, void *value_ptr) __dead2; -static inline void _pthread_introspection_thread_create(pthread_t t, bool destroy); +static inline void _pthread_introspection_thread_create(pthread_t t); static inline void _pthread_introspection_thread_start(pthread_t t); -static inline void _pthread_introspection_thread_terminate(pthread_t t, void *freeaddr, size_t freesize, bool destroy); +static inline void _pthread_introspection_thread_terminate(pthread_t t); static inline void _pthread_introspection_thread_destroy(pthread_t t); extern void _pthread_set_self(pthread_t); extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse); // trampoline into _pthread_wqthread extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); // trampoline into _pthread_start -/* Compatibility: previous pthread API used WORKQUEUE_OVERCOMMIT to request overcommit threads from - * the kernel. This definition is kept here, in userspace only, to perform the compatibility shimm - * from old API requests to the new kext conventions. - */ -#define WORKQUEUE_OVERCOMMIT 0x10000 - /* * Flags filed passed to bsdthread_create and back in pthread_start -31 <---------------------------------> 0 -_________________________________________ -| flags(8) | policy(8) | importance(16) | ------------------------------------------ -*/ - -#define PTHREAD_START_CUSTOM 0x01000000 + * 31 <---------------------------------> 0 + * _________________________________________ + * | flags(8) | policy(8) | importance(16) | + * ----------------------------------------- + */ +#define PTHREAD_START_CUSTOM 0x01000000 // #define PTHREAD_START_SETSCHED 0x02000000 -#define PTHREAD_START_DETACHED 0x04000000 +// was PTHREAD_START_DETACHED 0x04000000 #define PTHREAD_START_QOSCLASS 0x08000000 #define PTHREAD_START_TSD_BASE_SET 0x10000000 +#define PTHREAD_START_SUSPENDED 0x20000000 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff #define PTHREAD_START_POLICY_BITSHIFT 16 #define PTHREAD_START_POLICY_MASK 0xff #define PTHREAD_START_IMPORTANCE_MASK 0xffff -static int pthread_setschedparam_internal(pthread_t, mach_port_t, int, const struct sched_param *); +#if (!defined(__OPEN_SOURCE__) && TARGET_OS_OSX) || OS_VARIANT_RESOLVED // 40703288 +static int pthread_setschedparam_internal(pthread_t, mach_port_t, int, + const struct sched_param *); +#endif + extern pthread_t __bsdthread_create(void *(*func)(void *), void * func_arg, void * stack, pthread_t thread, unsigned int flags); extern int __bsdthread_register(void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, void *, int), int,void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), int32_t *,__uint64_t); extern int __bsdthread_terminate(void * freeaddr, size_t freesize, mach_port_t kport, mach_port_t joinsem); extern __uint64_t __thread_selfid( void ); -extern int __workq_open(void); -extern int __workq_kernreturn(int, void *, int, int); - -#if defined(__i386__) || defined(__x86_64__) -static const mach_vm_address_t PTHREAD_STACK_HINT = 0xB0000000; +#if __LP64__ +_Static_assert(offsetof(struct _pthread, tsd) == 224, "TSD LP64 offset"); #else -#error no PTHREAD_STACK_HINT for this architecture +_Static_assert(offsetof(struct _pthread, tsd) == 176, "TSD ILP32 offset"); #endif - -// Check that offsets of _PTHREAD_STRUCT_DIRECT_*_OFFSET values hasn't changed _Static_assert(offsetof(struct _pthread, tsd) + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET == offsetof(struct _pthread, thread_id), "_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET is correct"); -// Allocate a thread structure, stack and guard page. -// -// The thread structure may optionally be placed in the same allocation as the -// stack, residing above the top of the stack. This cannot be done if a -// custom stack address is provided. -// -// Similarly the guard page cannot be allocated if a custom stack address is -// provided. -// -// The allocated thread structure is initialized with values that indicate how -// it should be freed. - -static int -_pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack) -{ - int res; - kern_return_t kr; - pthread_t t = NULL; - mach_vm_address_t allocaddr = PTHREAD_STACK_HINT; - size_t allocsize = 0; - size_t guardsize = 0; - size_t stacksize = 0; - - PTHREAD_ASSERT(attrs->stacksize >= PTHREAD_STACK_MIN); - - *thread = NULL; - *stack = NULL; - - // Allocate a pthread structure if necessary - - if (attrs->stackaddr != NULL) { - PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0); - *stack = attrs->stackaddr; - allocsize = PTHREAD_SIZE; - } else { - guardsize = attrs->guardsize; - stacksize = attrs->stacksize; - allocsize = stacksize + guardsize + PTHREAD_SIZE; - } - - kr = mach_vm_map(mach_task_self(), - &allocaddr, - allocsize, - vm_page_size - 1, - VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, - MEMORY_OBJECT_NULL, - 0, - FALSE, - VM_PROT_DEFAULT, - VM_PROT_ALL, - VM_INHERIT_DEFAULT); - - if (kr != KERN_SUCCESS) { - kr = mach_vm_allocate(mach_task_self(), - &allocaddr, - allocsize, - VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE); - } - - if (kr == KERN_SUCCESS) { - // The stack grows down. - // Set the guard page at the lowest address of the - // newly allocated stack. Return the highest address - // of the stack. - if (guardsize) { - (void)mach_vm_protect(mach_task_self(), allocaddr, guardsize, FALSE, VM_PROT_NONE); - } - - // Thread structure resides at the top of the stack. - t = (void *)(allocaddr + stacksize + guardsize); - if (stacksize) { - // Returns the top of the stack. - *stack = t; - } - } - - if (t != NULL) { - _pthread_struct_init(t, attrs, - *stack, attrs->stacksize, - allocaddr, allocsize); - *thread = t; - res = 0; - } else { - res = EAGAIN; - } - return res; -} - -static int -_pthread_deallocate(pthread_t t) -{ - // Don't free the main thread. - if (t != &_thread) { - kern_return_t ret; - ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize); - PTHREAD_ASSERT(ret == KERN_SUCCESS); - } - return 0; -} - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreturn-stack-address" - -PTHREAD_NOINLINE -static void* -_pthread_current_stack_address(void) -{ - int a; - return &a; -} - -#pragma clang diagnostic pop - -// Terminates the thread if called from the currently running thread. -PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED -static void -_pthread_terminate(pthread_t t) -{ - PTHREAD_ASSERT(t == pthread_self()); - - uintptr_t freeaddr = (uintptr_t)t->freeaddr; - size_t freesize = t->freesize; - - // the size of just the stack - size_t freesize_stack = t->freesize; - - // We usually pass our structure+stack to bsdthread_terminate to free, but - // if we get told to keep the pthread_t structure around then we need to - // adjust the free size and addr in the pthread_t to just refer to the - // structure and not the stack. If we do end up deallocating the - // structure, this is useless work since no one can read the result, but we - // can't do it after the call to pthread_remove_thread because it isn't - // safe to dereference t after that. - if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){ - // Check to ensure the pthread structure itself is part of the - // allocation described by freeaddr/freesize, in which case we split and - // only deallocate the area below the pthread structure. In the event of a - // custom stack, the freeaddr/size will be the pthread structure itself, in - // which case we shouldn't free anything (the final else case). - freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr); - - // describe just the remainder for deallocation when the pthread_t goes away - t->freeaddr += freesize_stack; - t->freesize -= freesize_stack; - } else if (t == &_thread){ - freeaddr = t->stackaddr - pthread_get_stacksize_np(t); - uintptr_t stackborder = trunc_page((uintptr_t)_pthread_current_stack_address()); - freesize_stack = stackborder - freeaddr; - } else { - freesize_stack = 0; - } - - mach_port_t kport = _pthread_kernel_thread(t); - semaphore_t joinsem = t->joiner_notify; - - _pthread_dealloc_special_reply_port(t); - _pthread_dealloc_reply_port(t); +#pragma mark pthread attrs - // After the call to __pthread_remove_thread, it is not safe to - // dereference the pthread_t structure. - - bool destroy, should_exit; - destroy = (__pthread_remove_thread(t, true, &should_exit) != EBUSY); - - if (!destroy || t == &_thread) { - // Use the adjusted freesize of just the stack that we computed above. - freesize = freesize_stack; - } - - // Check if there is nothing to free because the thread has a custom - // stack allocation and is joinable. - if (freesize == 0) { - freeaddr = 0; - } - _pthread_introspection_thread_terminate(t, freeaddr, freesize, destroy); - if (should_exit) { - exitf(0); - } - - __bsdthread_terminate((void *)freeaddr, freesize, kport, joinsem); - PTHREAD_ABORT("thread %p didn't terminate", t); -} - -PTHREAD_NORETURN -static void -_pthread_terminate_invoke(pthread_t t) -{ - _pthread_terminate(t); -} +_Static_assert(sizeof(struct _pthread_attr_t) == sizeof(__darwin_pthread_attr_t), + "internal pthread_attr_t == external pthread_attr_t"); int pthread_attr_destroy(pthread_attr_t *attr) @@ -467,12 +316,24 @@ pthread_attr_getinheritsched(const pthread_attr_t *attr, int *inheritsched) return ret; } +static PTHREAD_ALWAYS_INLINE void +_pthread_attr_get_schedparam(const pthread_attr_t *attr, + struct sched_param *param) +{ + if (attr->schedset) { + *param = attr->param; + } else { + param->sched_priority = default_priority; + param->quantum = 10; /* quantum isn't public yet */ + } +} + int pthread_attr_getschedparam(const pthread_attr_t *attr, struct sched_param *param) { int ret = EINVAL; if (attr->sig == _PTHREAD_ATTR_SIG) { - *param = attr->param; + _pthread_attr_get_schedparam(attr, param); ret = 0; } return ret; @@ -489,24 +350,10 @@ pthread_attr_getschedpolicy(const pthread_attr_t *attr, int *policy) return ret; } -// Default stack size is 512KB; independent of the main thread's stack size. -static const size_t DEFAULT_STACK_SIZE = 512 * 1024; - int pthread_attr_init(pthread_attr_t *attr) { - attr->stacksize = DEFAULT_STACK_SIZE; - attr->stackaddr = NULL; - attr->sig = _PTHREAD_ATTR_SIG; - attr->param.sched_priority = default_priority; - attr->param.quantum = 10; /* quantum isn't public yet */ - attr->detached = PTHREAD_CREATE_JOINABLE; - attr->inherit = _PTHREAD_DEFAULT_INHERITSCHED; - attr->policy = _PTHREAD_DEFAULT_POLICY; - attr->fastpath = 1; - attr->schedset = 0; - attr->guardsize = vm_page_size; - attr->qosclass = _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, 0); + *attr = _pthread_attr_default; return 0; } @@ -515,8 +362,8 @@ pthread_attr_setdetachstate(pthread_attr_t *attr, int detachstate) { int ret = EINVAL; if (attr->sig == _PTHREAD_ATTR_SIG && - (detachstate == PTHREAD_CREATE_JOINABLE || - detachstate == PTHREAD_CREATE_DETACHED)) { + (detachstate == PTHREAD_CREATE_JOINABLE || + detachstate == PTHREAD_CREATE_DETACHED)) { attr->detached = detachstate; ret = 0; } @@ -528,8 +375,8 @@ pthread_attr_setinheritsched(pthread_attr_t *attr, int inheritsched) { int ret = EINVAL; if (attr->sig == _PTHREAD_ATTR_SIG && - (inheritsched == PTHREAD_INHERIT_SCHED || - inheritsched == PTHREAD_EXPLICIT_SCHED)) { + (inheritsched == PTHREAD_INHERIT_SCHED || + inheritsched == PTHREAD_EXPLICIT_SCHED)) { attr->inherit = inheritsched; ret = 0; } @@ -553,12 +400,14 @@ int pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy) { int ret = EINVAL; - if (attr->sig == _PTHREAD_ATTR_SIG && - (policy == SCHED_OTHER || - policy == SCHED_RR || - policy == SCHED_FIFO)) { + if (attr->sig == _PTHREAD_ATTR_SIG && (policy == SCHED_OTHER || + policy == SCHED_RR || policy == SCHED_FIFO)) { + if (!_PTHREAD_POLICY_IS_FIXEDPRI(policy)) { + /* non-fixedpri policy should remove cpupercent */ + attr->cpupercentset = 0; + } attr->policy = policy; - attr->schedset = 1; + attr->policyset = 1; ret = 0; } return ret; @@ -606,21 +455,27 @@ pthread_attr_setstackaddr(pthread_attr_t *attr, void *stackaddr) { int ret = EINVAL; if (attr->sig == _PTHREAD_ATTR_SIG && - ((uintptr_t)stackaddr % vm_page_size) == 0) { + ((uintptr_t)stackaddr % vm_page_size) == 0) { attr->stackaddr = stackaddr; - attr->fastpath = 0; + attr->defaultguardpage = false; attr->guardsize = 0; ret = 0; } return ret; } +static inline size_t +_pthread_attr_stacksize(const pthread_attr_t *attr) +{ + return attr->stacksize ? attr->stacksize : DEFAULT_STACK_SIZE; +} + int pthread_attr_getstacksize(const pthread_attr_t *attr, size_t *stacksize) { int ret = EINVAL; if (attr->sig == _PTHREAD_ATTR_SIG) { - *stacksize = attr->stacksize; + *stacksize = _pthread_attr_stacksize(attr); ret = 0; } return ret; @@ -631,70 +486,356 @@ pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize) { int ret = EINVAL; if (attr->sig == _PTHREAD_ATTR_SIG && - (stacksize % vm_page_size) == 0 && - stacksize >= PTHREAD_STACK_MIN) { + (stacksize % vm_page_size) == 0 && + stacksize >= PTHREAD_STACK_MIN) { + attr->stacksize = stacksize; + ret = 0; + } + return ret; +} + +int +pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t * stacksize) +{ + int ret = EINVAL; + if (attr->sig == _PTHREAD_ATTR_SIG) { + *stackaddr = (void *)((uintptr_t)attr->stackaddr - attr->stacksize); + *stacksize = _pthread_attr_stacksize(attr); + ret = 0; + } + return ret; +} + +// Per SUSv3, the stackaddr is the base address, the lowest addressable byte +// address. This is not the same as in pthread_attr_setstackaddr. +int +pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize) +{ + int ret = EINVAL; + if (attr->sig == _PTHREAD_ATTR_SIG && + ((uintptr_t)stackaddr % vm_page_size) == 0 && + (stacksize % vm_page_size) == 0 && + stacksize >= PTHREAD_STACK_MIN) { + attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize); attr->stacksize = stacksize; ret = 0; } - return ret; + return ret; +} + +int +pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize) +{ + int ret = EINVAL; + if (attr->sig == _PTHREAD_ATTR_SIG && (guardsize % vm_page_size) == 0) { + /* Guardsize of 0 is valid, means no guard */ + attr->defaultguardpage = false; + attr->guardsize = guardsize; + ret = 0; + } + return ret; +} + +static inline size_t +_pthread_attr_guardsize(const pthread_attr_t *attr) +{ + return attr->defaultguardpage ? vm_page_size : attr->guardsize; +} + +int +pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize) +{ + int ret = EINVAL; + if (attr->sig == _PTHREAD_ATTR_SIG) { + *guardsize = _pthread_attr_guardsize(attr); + ret = 0; + } + return ret; +} + +int +pthread_attr_setcpupercent_np(pthread_attr_t *attr, int percent, + unsigned long refillms) +{ + int ret = EINVAL; + if (attr->sig == _PTHREAD_ATTR_SIG && percent < UINT8_MAX && + refillms < _PTHREAD_ATTR_REFILLMS_MAX && attr->policyset && + _PTHREAD_POLICY_IS_FIXEDPRI(attr->policy)) { + attr->cpupercent = percent; + attr->refillms = (uint32_t)(refillms & 0x00ffffff); + attr->cpupercentset = 1; + ret = 0; + } + return ret; +} + +#pragma mark pthread lifetime + +// Allocate a thread structure, stack and guard page. +// +// The thread structure may optionally be placed in the same allocation as the +// stack, residing above the top of the stack. This cannot be done if a +// custom stack address is provided. +// +// Similarly the guard page cannot be allocated if a custom stack address is +// provided. +// +// The allocated thread structure is initialized with values that indicate how +// it should be freed. + +static pthread_t +_pthread_allocate(const pthread_attr_t *attrs, void **stack) +{ + mach_vm_address_t allocaddr = __pthread_stack_hint; + size_t allocsize, guardsize, stacksize; + kern_return_t kr; + pthread_t t; + + PTHREAD_ASSERT(attrs->stacksize == 0 || + attrs->stacksize >= PTHREAD_STACK_MIN); + + // Allocate a pthread structure if necessary + + if (attrs->stackaddr != NULL) { + PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0); + allocsize = PTHREAD_SIZE; + guardsize = 0; + // if the attrs struct specifies a custom + // stack address but not a custom size, using ->stacksize here instead + // of _pthread_attr_stacksize stores stacksize as zero, indicating + // that the stack size is unknown. + stacksize = attrs->stacksize; + } else { + guardsize = _pthread_attr_guardsize(attrs); + stacksize = _pthread_attr_stacksize(attrs) + PTHREAD_T_OFFSET; + allocsize = stacksize + guardsize + PTHREAD_SIZE; + allocsize = mach_vm_round_page(allocsize); + } + + kr = mach_vm_map(mach_task_self(), &allocaddr, allocsize, vm_page_size - 1, + VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, + 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); + + if (kr != KERN_SUCCESS) { + kr = mach_vm_allocate(mach_task_self(), &allocaddr, allocsize, + VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE); + } + if (kr != KERN_SUCCESS) { + *stack = NULL; + return NULL; + } + + // The stack grows down. + // Set the guard page at the lowest address of the + // newly allocated stack. Return the highest address + // of the stack. + if (guardsize) { + (void)mach_vm_protect(mach_task_self(), allocaddr, guardsize, + FALSE, VM_PROT_NONE); + } + + // Thread structure resides at the top of the stack (when using a + // custom stack, allocsize == PTHREAD_SIZE, so places the pthread_t + // at allocaddr). + t = (pthread_t)(allocaddr + allocsize - PTHREAD_SIZE); + if (attrs->stackaddr) { + *stack = attrs->stackaddr; + } else { + *stack = t; + } + + _pthread_struct_init(t, attrs, *stack, stacksize, allocaddr, allocsize); + return t; +} + +PTHREAD_NOINLINE +void +_pthread_deallocate(pthread_t t, bool from_mach_thread) +{ + kern_return_t ret; + + // Don't free the main thread. + if (t != main_thread()) { + if (!from_mach_thread) { // see __pthread_add_thread + _pthread_introspection_thread_destroy(t); + } + ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize); + PTHREAD_ASSERT(ret == KERN_SUCCESS); + } } -int -pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t * stacksize) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreturn-stack-address" + +PTHREAD_NOINLINE +static void* +_pthread_current_stack_address(void) { - int ret = EINVAL; - if (attr->sig == _PTHREAD_ATTR_SIG) { - *stackaddr = (void *)((uintptr_t)attr->stackaddr - attr->stacksize); - *stacksize = attr->stacksize; - ret = 0; - } - return ret; + int a; + return &a; } -// Per SUSv3, the stackaddr is the base address, the lowest addressable byte -// address. This is not the same as in pthread_attr_setstackaddr. -int -pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize) +#pragma clang diagnostic pop + +void +_pthread_joiner_wake(pthread_t thread) { - int ret = EINVAL; - if (attr->sig == _PTHREAD_ATTR_SIG && - ((uintptr_t)stackaddr % vm_page_size) == 0 && - (stacksize % vm_page_size) == 0 && - stacksize >= PTHREAD_STACK_MIN) { - attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize); - attr->stacksize = stacksize; - attr->fastpath = 0; - ret = 0; + uint32_t *exit_gate = &thread->tl_exit_gate; + + for (;;) { + int ret = __ulock_wake(UL_UNFAIR_LOCK | ULF_NO_ERRNO, exit_gate, 0); + if (ret == 0 || ret == -ENOENT) { + return; + } + if (ret != -EINTR) { + PTHREAD_INTERNAL_CRASH(-ret, "pthread_join() wake failure"); + } } - return ret; } -int -pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize) +// Terminates the thread if called from the currently running thread. +PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED +static void +_pthread_terminate(pthread_t t, void *exit_value) { - int ret = EINVAL; - if (attr->sig == _PTHREAD_ATTR_SIG) { - /* Guardsize of 0 is valid, ot means no guard */ - if ((guardsize % vm_page_size) == 0) { - attr->guardsize = guardsize; - attr->fastpath = 0; - ret = 0; + PTHREAD_ASSERT(t == pthread_self()); + + _pthread_introspection_thread_terminate(t); + + uintptr_t freeaddr = (uintptr_t)t->freeaddr; + size_t freesize = t->freesize; + bool should_exit; + + // the size of just the stack + size_t freesize_stack = t->freesize; + + // We usually pass our structure+stack to bsdthread_terminate to free, but + // if we get told to keep the pthread_t structure around then we need to + // adjust the free size and addr in the pthread_t to just refer to the + // structure and not the stack. If we do end up deallocating the + // structure, this is useless work since no one can read the result, but we + // can't do it after the call to pthread_remove_thread because it isn't + // safe to dereference t after that. + if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){ + // Check to ensure the pthread structure itself is part of the + // allocation described by freeaddr/freesize, in which case we split and + // only deallocate the area below the pthread structure. In the event of a + // custom stack, the freeaddr/size will be the pthread structure itself, in + // which case we shouldn't free anything (the final else case). + freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr); + + // describe just the remainder for deallocation when the pthread_t goes away + t->freeaddr += freesize_stack; + t->freesize -= freesize_stack; + } else if (t == main_thread()) { + freeaddr = t->stackaddr - pthread_get_stacksize_np(t); + uintptr_t stackborder = trunc_page((uintptr_t)_pthread_current_stack_address()); + freesize_stack = stackborder - freeaddr; + } else { + freesize_stack = 0; + } + + mach_port_t kport = _pthread_kernel_thread(t); + bool keep_thread_struct = false, needs_wake = false; + semaphore_t custom_stack_sema = MACH_PORT_NULL; + + _pthread_dealloc_special_reply_port(t); + _pthread_dealloc_reply_port(t); + + _PTHREAD_LOCK(_pthread_list_lock); + + // This piece of code interacts with pthread_join. It will always: + // - set tl_exit_gate to MACH_PORT_DEAD (thread exited) + // - set tl_exit_value to the value passed to pthread_exit() + // - decrement _pthread_count, so that we can exit the process when all + // threads exited even if not all of them were joined. + t->tl_exit_gate = MACH_PORT_DEAD; + t->tl_exit_value = exit_value; + should_exit = (--_pthread_count <= 0); + + // If we see a joiner, we prepost that the join has to succeed, + // and the joiner is committed to finish (even if it was canceled) + if (t->tl_join_ctx) { + custom_stack_sema = _pthread_joiner_prepost_wake(t); // unsets tl_joinable + needs_wake = true; + } + + // Joinable threads that have no joiner yet are kept on the thread list + // so that pthread_join() can later discover the thread when it is joined, + // and will have to do the pthread_t cleanup. + if (t->tl_joinable) { + t->tl_joiner_cleans_up = keep_thread_struct = true; + } else { + TAILQ_REMOVE(&__pthread_head, t, tl_plist); + } + + _PTHREAD_UNLOCK(_pthread_list_lock); + + if (needs_wake) { + // When we found a waiter, we want to drop the very contended list lock + // before we do the syscall in _pthread_joiner_wake(). Then, we decide + // who gets to cleanup the pthread_t between the joiner and the exiting + // thread: + // - the joiner tries to set tl_join_ctx to NULL + // - the exiting thread tries to set tl_joiner_cleans_up to true + // Whoever does it first commits the other guy to cleanup the pthread_t + _pthread_joiner_wake(t); + _PTHREAD_LOCK(_pthread_list_lock); + if (t->tl_join_ctx) { + t->tl_joiner_cleans_up = true; + keep_thread_struct = true; } + _PTHREAD_UNLOCK(_pthread_list_lock); } - return ret; -} -int -pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize) -{ - int ret = EINVAL; - if (attr->sig == _PTHREAD_ATTR_SIG) { - *guardsize = attr->guardsize; - ret = 0; + // + // /!\ dereferencing `t` past this point is not safe /!\ + // + + if (keep_thread_struct || t == main_thread()) { + // Use the adjusted freesize of just the stack that we computed above. + freesize = freesize_stack; + } else { + _pthread_introspection_thread_destroy(t); } - return ret; + + // Check if there is nothing to free because the thread has a custom + // stack allocation and is joinable. + if (freesize == 0) { + freeaddr = 0; + } + if (should_exit) { + exitf(0); + } + __bsdthread_terminate((void *)freeaddr, freesize, kport, custom_stack_sema); + PTHREAD_INTERNAL_CRASH(t, "thread didn't terminate"); +} + +PTHREAD_NORETURN +static void +_pthread_terminate_invoke(pthread_t t, void *exit_value) +{ +#if PTHREAD_T_OFFSET + void *p = NULL; + // During pthread termination there is a race + // between pthread_join and pthread_terminate; if the joiner is responsible + // for cleaning up the pthread_t struct, then it may destroy some part of the + // stack with it on 16k OSes. So that this doesn't cause _pthread_terminate() + // to crash because its stack has been removed from under its feet, just make + // sure termination happens in a part of the stack that is not on the same + // page as the pthread_t. + if (trunc_page((uintptr_t)__builtin_frame_address(0)) == + trunc_page((uintptr_t)t)) { + p = alloca(PTHREAD_T_OFFSET); + } + // And this __asm__ volatile is needed to stop the compiler from optimising + // away the alloca() completely. + __asm__ volatile ("" : : "r"(p) ); +#endif + _pthread_terminate(t, exit_value); } +#pragma mark pthread start / body /* * Create and start execution of a new thread. @@ -704,51 +845,22 @@ static void _pthread_body(pthread_t self, bool needs_tsd_base_set) { _pthread_set_self_internal(self, needs_tsd_base_set); - __pthread_add_thread(self, NULL, false, false); - void *result = (self->fun)(self->arg); - - _pthread_exit(self, result); + __pthread_started_thread(self); + _pthread_exit(self, (self->fun)(self->arg)); } PTHREAD_NORETURN void -_pthread_start(pthread_t self, - mach_port_t kport, - void *(*fun)(void *), - void *arg, - size_t stacksize, - unsigned int pflags) -{ - if ((pflags & PTHREAD_START_CUSTOM) == 0) { - void *stackaddr = self; - _pthread_struct_init(self, &_pthread_attr_default, - stackaddr, stacksize, - PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize)); - - if (pflags & PTHREAD_START_SETSCHED) { - self->policy = ((pflags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK); - self->param.sched_priority = (pflags & PTHREAD_START_IMPORTANCE_MASK); - } - - if ((pflags & PTHREAD_START_DETACHED) == PTHREAD_START_DETACHED) { - self->detached &= ~PTHREAD_CREATE_JOINABLE; - self->detached |= PTHREAD_CREATE_DETACHED; - } - } - - if ((pflags & PTHREAD_START_QOSCLASS) != 0) { - /* The QoS class is cached in the TSD of the pthread, so to reflect the - * class that the kernel brought us up at, the TSD must be primed from the - * flags parameter. - */ - self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (pflags & PTHREAD_START_QOSCLASS_MASK); - } else { - /* Give the thread a default QoS tier, of zero. */ - self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0); - } - +_pthread_start(pthread_t self, mach_port_t kport, + __unused void *(*fun)(void *), __unused void *arg, + __unused size_t stacksize, unsigned int pflags) +{ bool thread_tsd_bsd_set = (bool)(pflags & PTHREAD_START_TSD_BASE_SET); + if (os_unlikely(pflags & PTHREAD_START_SUSPENDED)) { + PTHREAD_INTERNAL_CRASH(0, + "kernel without PTHREAD_START_SUSPENDED support"); + } #if DEBUG PTHREAD_ASSERT(MACH_PORT_VALID(kport)); PTHREAD_ASSERT(_pthread_kernel_thread(self) == kport); @@ -756,20 +868,13 @@ _pthread_start(pthread_t self, // will mark the thread initialized _pthread_markcancel_if_canceled(self, kport); - self->fun = fun; - self->arg = arg; - _pthread_body(self, !thread_tsd_bsd_set); } PTHREAD_ALWAYS_INLINE static inline void -_pthread_struct_init(pthread_t t, - const pthread_attr_t *attrs, - void *stackaddr, - size_t stacksize, - void *freeaddr, - size_t freesize) +_pthread_struct_init(pthread_t t, const pthread_attr_t *attrs, + void *stackaddr, size_t stacksize, void *freeaddr, size_t freesize) { #if DEBUG PTHREAD_ASSERT(t->sig != _PTHREAD_SIG); @@ -777,23 +882,34 @@ _pthread_struct_init(pthread_t t, t->sig = _PTHREAD_SIG; t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = t; - t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0); + t->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &t->err_no; + if (attrs->schedset == 0) { + t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass; + } else { + t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = + _pthread_unspecified_priority(); + } + t->tsd[_PTHREAD_TSD_SLOT_PTR_MUNGE] = _pthread_ptr_munge_token; + t->tl_has_custom_stack = (attrs->stackaddr != NULL); + _PTHREAD_LOCK_INIT(t->lock); t->stackaddr = stackaddr; - t->stacksize = stacksize; + t->stackbottom = stackaddr - stacksize; t->freeaddr = freeaddr; t->freesize = freesize; - t->guardsize = attrs->guardsize; - t->detached = attrs->detached; + t->guardsize = _pthread_attr_guardsize(attrs); + t->tl_joinable = (attrs->detached == PTHREAD_CREATE_JOINABLE); t->inherit = attrs->inherit; - t->policy = attrs->policy; + t->tl_policy = attrs->policy; t->schedset = attrs->schedset; - t->param = attrs->param; + _pthread_attr_get_schedparam(attrs, &t->tl_param); t->cancel_state = PTHREAD_CANCEL_ENABLE | PTHREAD_CANCEL_DEFERRED; } +#pragma mark pthread public interface + /* Need to deprecate this in future */ int _pthread_is_threaded(void) @@ -818,7 +934,7 @@ mach_port_t pthread_mach_thread_np(pthread_t t) { mach_port_t kport = MACH_PORT_NULL; - (void)_pthread_is_valid(t, 0, &kport); + (void)_pthread_is_valid(t, &kport); return kport; } @@ -831,7 +947,7 @@ pthread_from_mach_thread_np(mach_port_t kernel_thread) /* No need to wait as mach port is already known */ _PTHREAD_LOCK(_pthread_list_lock); - TAILQ_FOREACH(p, &__pthread_head, plist) { + TAILQ_FOREACH(p, &__pthread_head, tl_plist) { if (_pthread_kernel_thread(p) == kernel_thread) { break; } @@ -847,6 +963,7 @@ size_t pthread_get_stacksize_np(pthread_t t) { size_t size = 0; + size_t stacksize = t->stackaddr - t->stackbottom; if (t == NULL) { return ESRCH; // XXX bug? @@ -863,7 +980,7 @@ pthread_get_stacksize_np(pthread_t t) // // Of course, on arm rlim_cur == rlim_max and there's only the one guard // page. So, we can skip all this there. - if (t == &_thread && t->stacksize + vm_page_size != t->freesize) { + if (t == main_thread() && stacksize + vm_page_size != t->freesize) { // We want to call getrlimit() just once, as it's relatively expensive static size_t rlimit_stack; @@ -877,55 +994,46 @@ pthread_get_stacksize_np(pthread_t t) } if (rlimit_stack == 0 || rlimit_stack > t->freesize) { - return t->stacksize; + return stacksize; } else { return rlimit_stack; } } #endif /* !defined(__arm__) && !defined(__arm64__) */ - if (t == pthread_self() || t == &_thread) { - return t->stacksize; + if (t == pthread_self() || t == main_thread()) { + size = stacksize; + goto out; } - _PTHREAD_LOCK(_pthread_list_lock); - - if (_pthread_is_valid_locked(t)) { - size = t->stacksize; + if (_pthread_validate_thread_and_list_lock(t)) { + size = stacksize; + _PTHREAD_UNLOCK(_pthread_list_lock); } else { size = ESRCH; // XXX bug? } - _PTHREAD_UNLOCK(_pthread_list_lock); - - return size; +out: + // binary compatibility issues force us to return + // DEFAULT_STACK_SIZE here when we do not know the size of the stack + return size ? size : DEFAULT_STACK_SIZE; } PTHREAD_NOEXPORT_VARIANT void * pthread_get_stackaddr_np(pthread_t t) { - void *addr = NULL; - - if (t == NULL) { - return (void *)(uintptr_t)ESRCH; // XXX bug? - } - // since the main thread will not get de-allocated from underneath us - if (t == pthread_self() || t == &_thread) { + if (t == pthread_self() || t == main_thread()) { return t->stackaddr; } - _PTHREAD_LOCK(_pthread_list_lock); - - if (_pthread_is_valid_locked(t)) { - addr = t->stackaddr; - } else { - addr = (void *)(uintptr_t)ESRCH; // XXX bug? + if (!_pthread_validate_thread_and_list_lock(t)) { + return (void *)(uintptr_t)ESRCH; // XXX bug? } + void *addr = t->stackaddr; _PTHREAD_UNLOCK(_pthread_list_lock); - return addr; } @@ -979,32 +1087,30 @@ _pthread_dealloc_special_reply_port(pthread_t t) { mach_port_t special_reply_port = _pthread_special_reply_port(t); if (special_reply_port != MACH_PORT_NULL) { - mach_port_mod_refs(mach_task_self(), special_reply_port, - MACH_PORT_RIGHT_RECEIVE, -1); + thread_destruct_special_reply_port(special_reply_port, + THREAD_SPECIAL_REPLY_PORT_ALL); } } pthread_t pthread_main_thread_np(void) { - return &_thread; + return main_thread(); } /* returns non-zero if the current thread is the main thread */ int pthread_main_np(void) { - pthread_t self = pthread_self(); - - return ((self->detached & _PTHREAD_CREATE_PARENT) == _PTHREAD_CREATE_PARENT); + return pthread_self() == main_thread(); } -/* if we are passed in a pthread_t that is NULL, then we return - the current thread's thread_id. So folks don't have to call - pthread_self, in addition to us doing it, if they just want - their thread_id. -*/ +/* + * if we are passed in a pthread_t that is NULL, then we return the current + * thread's thread_id. So folks don't have to call pthread_self, in addition to + * us doing it, if they just want their thread_id. + */ PTHREAD_NOEXPORT_VARIANT int pthread_threadid_np(pthread_t thread, uint64_t *thread_id) @@ -1018,11 +1124,10 @@ pthread_threadid_np(pthread_t thread, uint64_t *thread_id) if (thread == NULL || thread == self) { *thread_id = self->thread_id; + } else if (!_pthread_validate_thread_and_list_lock(thread)) { + res = ESRCH; } else { - _PTHREAD_LOCK(_pthread_list_lock); - if (!_pthread_is_valid_locked(thread)) { - res = ESRCH; - } else if (thread->thread_id == 0) { + if (thread->thread_id == 0) { res = EINVAL; } else { *thread_id = thread->thread_id; @@ -1036,20 +1141,18 @@ PTHREAD_NOEXPORT_VARIANT int pthread_getname_np(pthread_t thread, char *threadname, size_t len) { - int res = 0; + if (thread == pthread_self()) { + strlcpy(threadname, thread->pthread_name, len); + return 0; + } - if (thread == NULL) { + if (!_pthread_validate_thread_and_list_lock(thread)) { return ESRCH; } - _PTHREAD_LOCK(_pthread_list_lock); - if (_pthread_is_valid_locked(thread)) { - strlcpy(threadname, thread->pthread_name, len); - } else { - res = ESRCH; - } + strlcpy(threadname, thread->pthread_name, len); _PTHREAD_UNLOCK(_pthread_list_lock); - return res; + return 0; } @@ -1079,219 +1182,122 @@ pthread_setname_np(const char *name) PTHREAD_ALWAYS_INLINE static inline void -__pthread_add_thread(pthread_t t, const pthread_attr_t *attrs, - bool parent, bool from_mach_thread) +__pthread_add_thread(pthread_t t, bool from_mach_thread) { - bool should_deallocate = false; - bool should_add = true; - - mach_port_t kport = _pthread_kernel_thread(t); - if (os_slowpath(!MACH_PORT_VALID(kport))) { - PTHREAD_CLIENT_CRASH(kport, - "Unable to allocate thread port, possible port leak"); - } - if (from_mach_thread) { _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock); } else { _PTHREAD_LOCK(_pthread_list_lock); } - // The parent and child threads race to add the thread to the list. - // When called by the parent: - // - set parentcheck to true - // - back off if childrun is true - // When called by the child: - // - set childrun to true - // - back off if parentcheck is true - if (parent) { - t->parentcheck = 1; - if (t->childrun) { - // child got here first, don't add. - should_add = false; - } + TAILQ_INSERT_TAIL(&__pthread_head, t, tl_plist); + _pthread_count++; - // If the child exits before we check in then it has to keep - // the thread structure memory alive so our dereferences above - // are valid. If it's a detached thread, then no joiner will - // deallocate the thread structure itself. So we do it here. - if (t->childexit) { - should_add = false; - should_deallocate = ((t->detached & PTHREAD_CREATE_DETACHED) == PTHREAD_CREATE_DETACHED); - } + if (from_mach_thread) { + _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock); } else { - t->childrun = 1; - if (t->parentcheck) { - // Parent got here first, don't add. - should_add = false; - } - if (t->wqthread) { - // Work queue threads have no parent. Simulate. - t->parentcheck = 1; - } + _PTHREAD_UNLOCK(_pthread_list_lock); } - if (should_add) { - TAILQ_INSERT_TAIL(&__pthread_head, t, plist); - _pthread_count++; - - /* - * Set some initial values which we know in the pthread structure in - * case folks try to get the values before the thread can set them. - */ - if (parent && attrs && attrs->schedset == 0) { - t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass; - } + if (!from_mach_thread) { + // PR-26275485: Mach threads will likely crash trying to run + // introspection code. Since the fall out from the introspection + // code not seeing the injected thread is likely less than crashing + // in the introspection code, just don't make the call. + _pthread_introspection_thread_create(t); } +} - if (from_mach_thread){ - _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock); +PTHREAD_ALWAYS_INLINE +static inline void +__pthread_undo_add_thread(pthread_t t, bool from_mach_thread) +{ + if (from_mach_thread) { + _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock); } else { - _PTHREAD_UNLOCK(_pthread_list_lock); + _PTHREAD_LOCK(_pthread_list_lock); } - if (parent) { - if (!from_mach_thread) { - // PR-26275485: Mach threads will likely crash trying to run - // introspection code. Since the fall out from the introspection - // code not seeing the injected thread is likely less than crashing - // in the introspection code, just don't make the call. - _pthread_introspection_thread_create(t, should_deallocate); - } - if (should_deallocate) { - _pthread_deallocate(t); - } + TAILQ_REMOVE(&__pthread_head, t, tl_plist); + _pthread_count--; + + if (from_mach_thread) { + _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock); } else { - _pthread_introspection_thread_start(t); + _PTHREAD_UNLOCK(_pthread_list_lock); } } -// must always inline this function to avoid epilogues -// Returns EBUSY if the thread structure should be kept alive (is joinable). -// Returns ESRCH if the thread structure is no longer valid (was detached). PTHREAD_ALWAYS_INLINE -static inline int -__pthread_remove_thread(pthread_t t, bool child, bool *should_exit) +static inline void +__pthread_started_thread(pthread_t t) { - int ret = 0; - - bool should_remove = true; - - _PTHREAD_LOCK(_pthread_list_lock); - - // When a thread removes itself: - // - Set the childexit flag indicating that the thread has exited. - // - Return false if parentcheck is zero (must keep structure) - // - If the thread is joinable, keep it on the list so that - // the join operation succeeds. Still decrement the running - // thread count so that we exit if no threads are running. - // - Update the running thread count. - // When another thread removes a joinable thread: - // - CAREFUL not to dereference the thread before verifying that the - // reference is still valid using _pthread_is_valid_locked(). - // - Remove the thread from the list. - - if (child) { - t->childexit = 1; - if (t->parentcheck == 0) { - ret = EBUSY; - } - if ((t->detached & PTHREAD_CREATE_JOINABLE) != 0) { - ret = EBUSY; - should_remove = false; - } - *should_exit = (--_pthread_count <= 0); - } else if (!_pthread_is_valid_locked(t)) { - ret = ESRCH; - should_remove = false; - } else if ((t->detached & PTHREAD_CREATE_JOINABLE) == 0) { - // If we found a thread but it's not joinable, bail. - ret = ESRCH; - should_remove = false; - } else if (t->parentcheck == 0) { - // If we're not the child thread *and* the parent has not finished - // creating the thread yet, then we are another thread that's joining - // and we cannot deallocate the pthread. - ret = EBUSY; - } - if (should_remove) { - TAILQ_REMOVE(&__pthread_head, t, plist); + mach_port_t kport = _pthread_kernel_thread(t); + if (os_slowpath(!MACH_PORT_VALID(kport))) { + PTHREAD_CLIENT_CRASH(kport, + "Unable to allocate thread port, possible port leak"); } - - _PTHREAD_UNLOCK(_pthread_list_lock); - - return ret; + _pthread_introspection_thread_start(t); } +#define _PTHREAD_CREATE_NONE 0x0 +#define _PTHREAD_CREATE_FROM_MACH_THREAD 0x1 +#define _PTHREAD_CREATE_SUSPENDED 0x2 + static int -_pthread_create(pthread_t *thread, - const pthread_attr_t *attr, - void *(*start_routine)(void *), - void *arg, - bool from_mach_thread) +_pthread_create(pthread_t *thread, const pthread_attr_t *attrs, + void *(*start_routine)(void *), void *arg, unsigned int create_flags) { pthread_t t = NULL; - unsigned int flags = 0; + void *stack = NULL; + bool from_mach_thread = (create_flags & _PTHREAD_CREATE_FROM_MACH_THREAD); - pthread_attr_t *attrs = (pthread_attr_t *)attr; if (attrs == NULL) { attrs = &_pthread_attr_default; } else if (attrs->sig != _PTHREAD_ATTR_SIG) { return EINVAL; } - if (attrs->detached == PTHREAD_CREATE_DETACHED) { - flags |= PTHREAD_START_DETACHED; - } - + unsigned int flags = PTHREAD_START_CUSTOM; if (attrs->schedset != 0) { + struct sched_param p; + _pthread_attr_get_schedparam(attrs, &p); flags |= PTHREAD_START_SETSCHED; flags |= ((attrs->policy & PTHREAD_START_POLICY_MASK) << PTHREAD_START_POLICY_BITSHIFT); - flags |= (attrs->param.sched_priority & PTHREAD_START_IMPORTANCE_MASK); + flags |= (p.sched_priority & PTHREAD_START_IMPORTANCE_MASK); } else if (attrs->qosclass != 0) { flags |= PTHREAD_START_QOSCLASS; flags |= (attrs->qosclass & PTHREAD_START_QOSCLASS_MASK); } + if (create_flags & _PTHREAD_CREATE_SUSPENDED) { + flags |= PTHREAD_START_SUSPENDED; + } __is_threaded = 1; - void *stack; - - if (attrs->fastpath) { - // kernel will allocate thread and stack, pass stacksize. - stack = (void *)attrs->stacksize; - } else { - // allocate the thread and its stack - flags |= PTHREAD_START_CUSTOM; - - int res; - res = _pthread_allocate(&t, attrs, &stack); - if (res) { - return res; - } - - t->arg = arg; - t->fun = start_routine; + t =_pthread_allocate(attrs, &stack); + if (t == NULL) { + return EAGAIN; } - pthread_t t2; - t2 = __bsdthread_create(start_routine, arg, stack, t, flags); - if (t2 == (pthread_t)-1) { + t->arg = arg; + t->fun = start_routine; + __pthread_add_thread(t, from_mach_thread); + + if (__bsdthread_create(start_routine, arg, stack, t, flags) == + (pthread_t)-1) { if (errno == EMFILE) { PTHREAD_CLIENT_CRASH(0, "Unable to allocate thread port, possible port leak"); } - if (flags & PTHREAD_START_CUSTOM) { - // free the thread and stack if we allocated it - _pthread_deallocate(t); - } + __pthread_undo_add_thread(t, from_mach_thread); + _pthread_deallocate(t, from_mach_thread); return EAGAIN; } - if (t == NULL) { - t = t2; - } - __pthread_add_thread(t, attrs, true, from_mach_thread); + if (create_flags & _PTHREAD_CREATE_SUSPENDED) { + _pthread_markcancel_if_canceled(t, _pthread_kernel_thread(t)); + } // n.b. if a thread is created detached and exits, t will be invalid *thread = t; @@ -1299,78 +1305,87 @@ _pthread_create(pthread_t *thread, } int -pthread_create(pthread_t *thread, - const pthread_attr_t *attr, - void *(*start_routine)(void *), - void *arg) +pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg) { - return _pthread_create(thread, attr, start_routine, arg, false); + unsigned int flags = _PTHREAD_CREATE_NONE; + return _pthread_create(thread, attr, start_routine, arg, flags); } int -pthread_create_from_mach_thread(pthread_t *thread, - const pthread_attr_t *attr, - void *(*start_routine)(void *), - void *arg) +pthread_create_from_mach_thread(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg) { - return _pthread_create(thread, attr, start_routine, arg, true); + unsigned int flags = _PTHREAD_CREATE_FROM_MACH_THREAD; + return _pthread_create(thread, attr, start_routine, arg, flags); } +#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288 +/* Functions defined in machine-dependent files. */ +PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp); + PTHREAD_NORETURN static void _pthread_suspended_body(pthread_t self) { _pthread_set_self(self); - __pthread_add_thread(self, NULL, false, false); + __pthread_started_thread(self); _pthread_exit(self, (self->fun)(self->arg)); } -int -pthread_create_suspended_np(pthread_t *thread, - const pthread_attr_t *attr, - void *(*start_routine)(void *), - void *arg) +static int +_pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attrs, + void *(*start_routine)(void *), void *arg) { - int res; + pthread_t t; void *stack; mach_port_t kernel_thread = MACH_PORT_NULL; - const pthread_attr_t *attrs = attr; if (attrs == NULL) { attrs = &_pthread_attr_default; } else if (attrs->sig != _PTHREAD_ATTR_SIG) { return EINVAL; } - pthread_t t; - res = _pthread_allocate(&t, attrs, &stack); - if (res) { - return res; + t = _pthread_allocate(attrs, &stack); + if (t == NULL) { + return EAGAIN; } - *thread = t; - - kern_return_t kr; - kr = thread_create(mach_task_self(), &kernel_thread); - if (kr != KERN_SUCCESS) { - //PTHREAD_ABORT("thread_create() failed: %d", kern_res); - return EINVAL; /* Need better error here? */ + if (thread_create(mach_task_self(), &kernel_thread) != KERN_SUCCESS) { + _pthread_deallocate(t, false); + return EAGAIN; } _pthread_set_kernel_thread(t, kernel_thread); - (void)pthread_setschedparam_internal(t, kernel_thread, t->policy, &t->param); + (void)pthread_setschedparam_internal(t, kernel_thread, + t->tl_policy, &t->tl_param); __is_threaded = 1; t->arg = arg; t->fun = start_routine; - t->cancel_state |= _PTHREAD_CANCEL_INITIALIZED; - __pthread_add_thread(t, NULL, true, false); + __pthread_add_thread(t, false); // Set up a suspended thread. - _pthread_setup(t, _pthread_suspended_body, stack, 1, 0); - return res; + _pthread_setup_suspended(t, _pthread_suspended_body, stack); + *thread = t; + return 0; +} +#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX + +int +pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg) +{ +#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288 + if (_os_xbs_chrooted) { + return _pthread_create_suspended_np(thread, attr, start_routine, arg); + } +#endif + unsigned int flags = _PTHREAD_CREATE_SUSPENDED; + return _pthread_create(thread, attr, start_routine, arg, flags); } @@ -1379,33 +1394,31 @@ int pthread_detach(pthread_t thread) { int res = 0; - bool join = false; - semaphore_t sema = SEMAPHORE_NULL; + bool join = false, wake = false; - if (!_pthread_is_valid(thread, PTHREAD_IS_VALID_LOCK_THREAD, NULL)) { - return ESRCH; // Not a valid thread to detach. + if (!_pthread_validate_thread_and_list_lock(thread)) { + return ESRCH; } - if ((thread->detached & PTHREAD_CREATE_DETACHED) || - !(thread->detached & PTHREAD_CREATE_JOINABLE)) { + if (!thread->tl_joinable) { res = EINVAL; - } else if (thread->detached & _PTHREAD_EXITED) { + } else if (thread->tl_exit_gate == MACH_PORT_DEAD) { // Join the thread if it's already exited. join = true; } else { - thread->detached &= ~PTHREAD_CREATE_JOINABLE; - thread->detached |= PTHREAD_CREATE_DETACHED; - sema = thread->joiner_notify; + thread->tl_joinable = false; // _pthread_joiner_prepost_wake uses this + if (thread->tl_join_ctx) { + (void)_pthread_joiner_prepost_wake(thread); + wake = true; + } } - - _PTHREAD_UNLOCK(thread->lock); + _PTHREAD_UNLOCK(_pthread_list_lock); if (join) { pthread_join(thread, NULL); - } else if (sema) { - semaphore_signal(sema); + } else if (wake) { + _pthread_joiner_wake(thread); } - return res; } @@ -1418,7 +1431,7 @@ pthread_kill(pthread_t th, int sig) } mach_port_t kport = MACH_PORT_NULL; - if (!_pthread_is_valid(th, 0, &kport)) { + if (!_pthread_is_valid(th, &kport)) { return ESRCH; // Not a valid thread. } @@ -1452,18 +1465,19 @@ __pthread_workqueue_setkill(int enable) /* For compatibility... */ pthread_t -_pthread_self(void) { +_pthread_self(void) +{ return pthread_self(); } /* * Terminate a thread. */ -int __disable_threadsignal(int); +extern int __disable_threadsignal(int); PTHREAD_NORETURN static void -_pthread_exit(pthread_t self, void *value_ptr) +_pthread_exit(pthread_t self, void *exit_value) { struct __darwin_pthread_handler_rec *handler; @@ -1471,7 +1485,7 @@ _pthread_exit(pthread_t self, void *value_ptr) __disable_threadsignal(1); // Set cancel state to disable and type to deferred - _pthread_setcancelstate_exit(self, value_ptr, __unix_conforming); + _pthread_setcancelstate_exit(self, exit_value); while ((handler = self->__cleanup_stack) != 0) { (handler->__routine)(handler->__arg); @@ -1479,71 +1493,44 @@ _pthread_exit(pthread_t self, void *value_ptr) } _pthread_tsd_cleanup(self); - _PTHREAD_LOCK(self->lock); - self->detached |= _PTHREAD_EXITED; - self->exit_value = value_ptr; - - if ((self->detached & PTHREAD_CREATE_JOINABLE) && - self->joiner_notify == SEMAPHORE_NULL) { - self->joiner_notify = (semaphore_t)os_get_cached_semaphore(); - } - _PTHREAD_UNLOCK(self->lock); - // Clear per-thread semaphore cache os_put_cached_semaphore(SEMAPHORE_NULL); - _pthread_terminate_invoke(self); + _pthread_terminate_invoke(self, exit_value); } void -pthread_exit(void *value_ptr) +pthread_exit(void *exit_value) { pthread_t self = pthread_self(); - if (self->wqthread == 0) { - _pthread_exit(self, value_ptr); - } else { - PTHREAD_ABORT("pthread_exit() may only be called against threads created via pthread_create()"); + if (os_unlikely(self->wqthread)) { + PTHREAD_CLIENT_CRASH(0, "pthread_exit() called from a thread " + "not created by pthread_create()"); } + _pthread_exit(self, exit_value); } PTHREAD_NOEXPORT_VARIANT int -pthread_getschedparam(pthread_t thread, - int *policy, - struct sched_param *param) +pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param) { - int ret = 0; - - if (thread == NULL) { + if (!_pthread_validate_thread_and_list_lock(thread)) { return ESRCH; } - _PTHREAD_LOCK(_pthread_list_lock); - - if (_pthread_is_valid_locked(thread)) { - if (policy) { - *policy = thread->policy; - } - if (param) { - *param = thread->param; - } - } else { - ret = ESRCH; - } - + if (policy) *policy = thread->tl_policy; + if (param) *param = thread->tl_param; _PTHREAD_UNLOCK(_pthread_list_lock); - - return ret; + return 0; } + PTHREAD_ALWAYS_INLINE static inline int -pthread_setschedparam_internal(pthread_t thread, - mach_port_t kport, - int policy, - const struct sched_param *param) +pthread_setschedparam_internal(pthread_t thread, mach_port_t kport, int policy, + const struct sched_param *param) { policy_base_data_t bases; policy_base_t base; @@ -1575,41 +1562,37 @@ pthread_setschedparam_internal(pthread_t thread, return (ret != KERN_SUCCESS) ? EINVAL : 0; } - PTHREAD_NOEXPORT_VARIANT int pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param) { mach_port_t kport = MACH_PORT_NULL; - int res; int bypass = 1; // since the main thread will not get de-allocated from underneath us - if (t == pthread_self() || t == &_thread) { + if (t == pthread_self() || t == main_thread()) { kport = _pthread_kernel_thread(t); } else { bypass = 0; - (void)_pthread_is_valid(t, 0, &kport); + if (!_pthread_is_valid(t, &kport)) { + return ESRCH; + } } - res = pthread_setschedparam_internal(t, kport, policy, param); - if (res == 0) { - if (bypass == 0) { - // Ensure the thread is still valid. - _PTHREAD_LOCK(_pthread_list_lock); - if (_pthread_is_valid_locked(t)) { - t->policy = policy; - t->param = *param; - } else { - res = ESRCH; - } - _PTHREAD_UNLOCK(_pthread_list_lock); - } else { - t->policy = policy; - t->param = *param; - } + int res = pthread_setschedparam_internal(t, kport, policy, param); + if (res) return res; + + if (bypass) { + _PTHREAD_LOCK(_pthread_list_lock); + } else if (!_pthread_validate_thread_and_list_lock(t)) { + // Ensure the thread is still valid. + return ESRCH; } - return res; + + t->tl_policy = policy; + t->tl_param = *param; + _PTHREAD_UNLOCK(_pthread_list_lock); + return 0; } @@ -1639,25 +1622,49 @@ PTHREAD_NOINLINE void _pthread_set_self(pthread_t p) { - return _pthread_set_self_internal(p, true); +#if VARIANT_DYLD + if (os_likely(!p)) { + return _pthread_set_self_dyld(); + } +#endif // VARIANT_DYLD + _pthread_set_self_internal(p, true); } -PTHREAD_ALWAYS_INLINE -static inline void -_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set) +#if VARIANT_DYLD +// _pthread_set_self_dyld is noinline+noexport to allow the option for +// static libsyscall to adopt this as the entry point from mach_init if +// desired +PTHREAD_NOINLINE PTHREAD_NOEXPORT +void +_pthread_set_self_dyld(void) { - if (p == NULL) { - p = &_thread; - } + pthread_t p = main_thread(); + p->thread_id = __thread_selfid(); - uint64_t tid = __thread_selfid(); - if (tid == -1ull) { - PTHREAD_ABORT("failed to set thread_id"); + if (os_unlikely(p->thread_id == -1ull)) { + PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id"); } + // pthread self and the errno address are the + // bare minimium TSD setup that dyld needs to actually function. Without + // this, TSD access will fail and crash if it uses bits of Libc prior to + // library initialization. __pthread_init will finish the initialization + // during library init. p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = p; p->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &p->err_no; - p->thread_id = tid; + _thread_set_tsd_base(&p->tsd[0]); +} +#endif // VARIANT_DYLD + +PTHREAD_ALWAYS_INLINE +static inline void +_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set) +{ + p->thread_id = __thread_selfid(); + + if (os_unlikely(p->thread_id == -1ull)) { + PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id"); + } if (needs_tsd_base_set) { _thread_set_tsd_base(&p->tsd[0]); @@ -1719,6 +1726,32 @@ pthread_setconcurrency(int new_level) return 0; } +#if !defined(VARIANT_STATIC) +void * +malloc(size_t sz) +{ + if (_pthread_malloc) { + return _pthread_malloc(sz); + } else { + return NULL; + } +} + +void +free(void *p) +{ + if (_pthread_free) { + _pthread_free(p); + } +} +#endif // VARIANT_STATIC + +/* + * Perform package initialization - called automatically when application starts + */ +struct ProgramVars; /* forward reference */ + +#if !VARIANT_DYLD static unsigned long _pthread_strtoul(const char *p, const char **endptr, int base) { @@ -1777,36 +1810,29 @@ out: return ret; } -#if !defined(VARIANT_STATIC) -void * -malloc(size_t sz) +static void +parse_ptr_munge_params(const char *envp[], const char *apple[]) { - if (_pthread_malloc) { - return _pthread_malloc(sz); - } else { - return NULL; + const char *p, *s; + p = _simple_getenv(apple, "ptr_munge"); + if (p) { + _pthread_ptr_munge_token = _pthread_strtoul(p, &s, 16); + bzero((char *)p, strlen(p)); } -} - -void -free(void *p) -{ - if (_pthread_free) { - _pthread_free(p); +#if !DEBUG + if (_pthread_ptr_munge_token) return; +#endif + p = _simple_getenv(envp, "PTHREAD_PTR_MUNGE_TOKEN"); + if (p) { + uintptr_t t = _pthread_strtoul(p, &s, 16); + if (t) _pthread_ptr_munge_token = t; } } -#endif // VARIANT_STATIC - -/* - * Perform package initialization - called automatically when application starts - */ -struct ProgramVars; /* forward reference */ int __pthread_init(const struct _libpthread_functions *pthread_funcs, - const char *envp[] __unused, - const char *apple[], - const struct ProgramVars *vars __unused) + const char *envp[], const char *apple[], + const struct ProgramVars *vars __unused) { // Save our provided pushed-down functions if (pthread_funcs) { @@ -1829,11 +1855,11 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs, host_t host = mach_host_self(); kr = host_info(host, flavor, (host_info_t)&priority_info, &count); if (kr != KERN_SUCCESS) { - PTHREAD_ABORT("host_info(mach_host_self(), ...) failed: %s", mach_error_string(kr)); + PTHREAD_INTERNAL_CRASH(kr, "host_info() failed"); } else { - default_priority = priority_info.user_priority; - min_priority = priority_info.minimum_priority; - max_priority = priority_info.maximum_priority; + default_priority = (uint8_t)priority_info.user_priority; + min_priority = (uint8_t)priority_info.minimum_priority; + max_priority = (uint8_t)priority_info.maximum_priority; } mach_port_deallocate(mach_task_self(), host); @@ -1863,12 +1889,22 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs, allocsize = 0; } - pthread_t thread = &_thread; - pthread_attr_init(&_pthread_attr_default); + // Initialize random ptr_munge token from the kernel. + parse_ptr_munge_params(envp, apple); + + // libpthread.a in dyld "owns" the main thread structure itself and sets + // up the tsd to point to it. So take the pthread_self() from there + // and make it our main thread point. + pthread_t thread = (pthread_t)_pthread_getspecific_direct( + _PTHREAD_TSD_SLOT_PTHREAD_SELF); + PTHREAD_ASSERT(thread); + _main_thread_ptr = thread; + + PTHREAD_ASSERT(_pthread_attr_default.qosclass == + _pthread_default_priority(0)); _pthread_struct_init(thread, &_pthread_attr_default, - stackaddr, stacksize, - allocaddr, allocsize); - thread->detached = PTHREAD_CREATE_JOINABLE; + stackaddr, stacksize, allocaddr, allocsize); + thread->tl_joinable = true; // Finish initialization with common code that is reinvoked on the // child side of a fork. @@ -1897,66 +1933,35 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs, return 0; } +#endif // !VARIANT_DYLD PTHREAD_NOEXPORT void _pthread_main_thread_init(pthread_t p) { TAILQ_INIT(&__pthread_head); _PTHREAD_LOCK_INIT(_pthread_list_lock); - - // Re-use the main thread's static storage if no thread was provided. - if (p == NULL) { - if (_thread.tsd[0] != 0) { - bzero(&_thread, sizeof(struct _pthread)); - } - p = &_thread; - } - _PTHREAD_LOCK_INIT(p->lock); _pthread_set_kernel_thread(p, mach_thread_self()); _pthread_set_reply_port(p, mach_reply_port()); p->__cleanup_stack = NULL; - p->joiner_notify = SEMAPHORE_NULL; - p->joiner = MACH_PORT_NULL; - p->detached |= _PTHREAD_CREATE_PARENT; + p->tl_join_ctx = NULL; + p->tl_exit_gate = MACH_PORT_NULL; p->tsd[__TSD_SEMAPHORE_CACHE] = (void*)SEMAPHORE_NULL; + p->tsd[__TSD_MACH_SPECIAL_REPLY] = 0; p->cancel_state |= _PTHREAD_CANCEL_INITIALIZED; // Initialize the list of threads with the new main thread. - TAILQ_INSERT_HEAD(&__pthread_head, p, plist); + TAILQ_INSERT_HEAD(&__pthread_head, p, tl_plist); _pthread_count = 1; - _pthread_set_self(p); _pthread_introspection_thread_start(p); } -int -_pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming) -{ - int ret = __pthread_remove_thread(thread, false, NULL); - if (ret != 0 && ret != EBUSY) { - // Returns ESRCH if the thread was not created joinable. - return ret; - } - - if (value_ptr) { - *value_ptr = _pthread_get_exit_value(thread, conforming); - } - _pthread_introspection_thread_destroy(thread); - if (ret != EBUSY) { - // __pthread_remove_thread returns EBUSY if the parent has not - // finished creating the thread (and is still expecting the pthread_t - // to be alive). - _pthread_deallocate(thread); - } - return 0; -} - int sched_yield(void) { - swtch_pri(0); - return 0; + swtch_pri(0); + return 0; } // XXX remove @@ -1974,22 +1979,25 @@ pthread_yield_np(void) +// Libsystem knows about this symbol and exports it to libsyscall PTHREAD_NOEXPORT_VARIANT void _pthread_clear_qos_tsd(mach_port_t thread_port) { if (thread_port == MACH_PORT_NULL || (uintptr_t)_pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF) == thread_port) { /* Clear the current thread's TSD, that can be done inline. */ - _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0)); + _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, + _pthread_unspecified_priority()); } else { pthread_t p; _PTHREAD_LOCK(_pthread_list_lock); - TAILQ_FOREACH(p, &__pthread_head, plist) { + TAILQ_FOREACH(p, &__pthread_head, tl_plist) { mach_port_t kp = _pthread_kernel_thread(p); if (thread_port == kp) { - p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0); + p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = + _pthread_unspecified_priority(); break; } } @@ -1999,7 +2007,35 @@ _pthread_clear_qos_tsd(mach_port_t thread_port) } -/***** pthread workqueue support routines *****/ +#pragma mark pthread/stack_np.h public interface + + +#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__) +typedef uintptr_t frame_data_addr_t; + +struct frame_data { + frame_data_addr_t frame_addr_next; + frame_data_addr_t ret_addr; +}; +#else +#error ********** Unimplemented architecture +#endif + +uintptr_t +pthread_stack_frame_decode_np(uintptr_t frame_addr, uintptr_t *return_addr) +{ + struct frame_data *frame = (struct frame_data *)frame_addr; + + if (return_addr) { + *return_addr = (uintptr_t)frame->ret_addr; + } + + return (uintptr_t)frame->frame_addr_next; +} + + +#pragma mark pthread workqueue support routines + PTHREAD_NOEXPORT void _pthread_bsdthread_init(struct _pthread_registration_data *data) @@ -2011,19 +2047,18 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data) data->tsd_offset = offsetof(struct _pthread, tsd); data->mach_thread_self_offset = __TSD_MACH_THREAD_SELF * sizeof(void *); - int rv = __bsdthread_register(thread_start, - start_wqthread, (int)PTHREAD_SIZE, - (void*)data, (uintptr_t)sizeof(*data), - data->dispatch_queue_offset); + int rv = __bsdthread_register(thread_start, start_wqthread, (int)PTHREAD_SIZE, + (void*)data, (uintptr_t)sizeof(*data), data->dispatch_queue_offset); if (rv > 0) { - if ((rv & PTHREAD_FEATURE_QOS_DEFAULT) == 0) { - PTHREAD_INTERNAL_CRASH(rv, - "Missing required support for QOS_CLASS_DEFAULT"); - } - if ((rv & PTHREAD_FEATURE_QOS_MAINTENANCE) == 0) { - PTHREAD_INTERNAL_CRASH(rv, - "Missing required support for QOS_CLASS_MAINTENANCE"); + int required_features = + PTHREAD_FEATURE_FINEPRIO | + PTHREAD_FEATURE_BSDTHREADCTL | + PTHREAD_FEATURE_SETSELF | + PTHREAD_FEATURE_QOS_MAINTENANCE | + PTHREAD_FEATURE_QOS_DEFAULT; + if ((rv & required_features) != required_features) { + PTHREAD_INTERNAL_CRASH(rv, "Missing required kernel support"); } __pthread_supported_features = rv; } @@ -2039,9 +2074,13 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data) pthread_priority_t main_qos = (pthread_priority_t)data->main_qos; - if (_pthread_priority_get_qos_newest(main_qos) != QOS_CLASS_UNSPECIFIED) { + if (_pthread_priority_thread_qos(main_qos) != THREAD_QOS_UNSPECIFIED) { _pthread_set_main_qos(main_qos); - _thread.tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos; + main_thread()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos; + } + + if (data->stack_addr_hint) { + __pthread_stack_hint = data->stack_addr_hint; } if (__libdispatch_workerfunction != NULL) { @@ -2050,191 +2089,188 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data) } } -// workqueue entry point from kernel -PTHREAD_NORETURN -void -_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr, void *keventlist, int flags, int nkevents) +PTHREAD_NOINLINE +static void +_pthread_wqthread_legacy_worker_wrap(pthread_priority_t pp) +{ + /* Old thread priorities are inverted from where we have them in + * the new flexible priority scheme. The highest priority is zero, + * up to 2, with background at 3. + */ + pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction; + bool overcommit = (pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG); + int opts = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0; + + switch (_pthread_priority_thread_qos(pp)) { + case THREAD_QOS_USER_INITIATED: + return (*func)(WORKQ_HIGH_PRIOQUEUE, opts, NULL); + case THREAD_QOS_LEGACY: + /* B&I builders can't pass a QOS_CLASS_DEFAULT thread to dispatch, for fear of the QoS being + * picked up by NSThread (et al) and transported around the system. So change the TSD to + * make this thread look like QOS_CLASS_USER_INITIATED even though it will still run as legacy. + */ + _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, + _pthread_priority_make_from_thread_qos(THREAD_QOS_USER_INITIATED, 0, 0)); + return (*func)(WORKQ_DEFAULT_PRIOQUEUE, opts, NULL); + case THREAD_QOS_UTILITY: + return (*func)(WORKQ_LOW_PRIOQUEUE, opts, NULL); + case THREAD_QOS_BACKGROUND: + return (*func)(WORKQ_BG_PRIOQUEUE, opts, NULL); + } + PTHREAD_INTERNAL_CRASH(pp, "Invalid pthread priority for the legacy interface"); +} + +PTHREAD_ALWAYS_INLINE +static inline pthread_priority_t +_pthread_wqthread_priority(int flags) { - PTHREAD_ASSERT(flags & WQ_FLAG_THREAD_NEWSPI); + pthread_priority_t pp = 0; + thread_qos_t qos; + + if (flags & WQ_FLAG_THREAD_KEVENT) { + pp |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG; + } + if (flags & WQ_FLAG_THREAD_EVENT_MANAGER) { + return pp | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; + } - bool thread_reuse = flags & WQ_FLAG_THREAD_REUSE; - bool overcommit = flags & WQ_FLAG_THREAD_OVERCOMMIT; - bool kevent = flags & WQ_FLAG_THREAD_KEVENT; - bool workloop = (flags & WQ_FLAG_THREAD_WORKLOOP) && - __libdispatch_workloopfunction != NULL; - PTHREAD_ASSERT((!kevent) || (__libdispatch_keventfunction != NULL)); - PTHREAD_ASSERT(!workloop || kevent); + if (flags & WQ_FLAG_THREAD_OVERCOMMIT) { + pp |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG; + } + if (flags & WQ_FLAG_THREAD_PRIO_QOS) { + qos = (thread_qos_t)(flags & WQ_FLAG_THREAD_PRIO_MASK); + pp = _pthread_priority_make_from_thread_qos(qos, 0, pp); + } else if (flags & WQ_FLAG_THREAD_PRIO_SCHED) { + pp |= _PTHREAD_PRIORITY_SCHED_PRI_MASK; + pp |= (flags & WQ_FLAG_THREAD_PRIO_MASK); + } else { + PTHREAD_INTERNAL_CRASH(flags, "Missing priority"); + } + return pp; +} - pthread_priority_t priority = 0; - unsigned long priority_flags = 0; +PTHREAD_NOINLINE +static void +_pthread_wqthread_setup(pthread_t self, mach_port_t kport, void *stacklowaddr, + int flags) +{ + void *stackaddr = self; + size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr; - if (overcommit) - priority_flags |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG; - if (flags & WQ_FLAG_THREAD_EVENT_MANAGER) - priority_flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG; - if (kevent) - priority_flags |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG; + _pthread_struct_init(self, &_pthread_attr_default, stackaddr, stacksize, + PTHREAD_ALLOCADDR(stackaddr, stacksize), + PTHREAD_ALLOCSIZE(stackaddr, stacksize)); - int thread_class = flags & WQ_FLAG_THREAD_PRIOMASK; - priority = _pthread_priority_make_newest(thread_class, 0, priority_flags); + _pthread_set_kernel_thread(self, kport); + self->wqthread = 1; + self->wqkillset = 0; + self->tl_joinable = false; + self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED; - if (!thread_reuse) { - // New thread created by kernel, needs initialization. - void *stackaddr = self; - size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr; + // Update the running thread count and set childrun bit. + bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET); + _pthread_set_self_internal(self, !thread_tsd_base_set); + __pthread_add_thread(self, false); + __pthread_started_thread(self); +} - _pthread_struct_init(self, &_pthread_attr_default, - stackaddr, stacksize, - PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize)); +PTHREAD_NORETURN PTHREAD_NOINLINE +static void +_pthread_wqthread_exit(pthread_t self) +{ + pthread_priority_t pp; + thread_qos_t qos; - _pthread_set_kernel_thread(self, kport); - self->wqthread = 1; - self->wqkillset = 0; - self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED; + pp = (pthread_priority_t)self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS]; + qos = _pthread_priority_thread_qos(pp); + if (qos == THREAD_QOS_UNSPECIFIED || qos > WORKQ_THREAD_QOS_CLEANUP) { + // Reset QoS to something low for the cleanup process + pp = _pthread_priority_make_from_thread_qos(WORKQ_THREAD_QOS_CLEANUP, 0, 0); + self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp; + } - // Not a joinable thread. - self->detached &= ~PTHREAD_CREATE_JOINABLE; - self->detached |= PTHREAD_CREATE_DETACHED; + _pthread_exit(self, NULL); +} - // Update the running thread count and set childrun bit. - bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET); - _pthread_set_self_internal(self, !thread_tsd_base_set); - _pthread_introspection_thread_create(self, false); - __pthread_add_thread(self, NULL, false, false); +// workqueue entry point from kernel +void +_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr, + void *keventlist, int flags, int nkevents) +{ + if ((flags & WQ_FLAG_THREAD_REUSE) == 0) { + _pthread_wqthread_setup(self, kport, stacklowaddr, flags); } - // If we're running with fine-grained priority, we also need to - // set this thread to have the QoS class provided to use by the kernel - if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) { - _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(thread_class, 0, priority_flags)); + pthread_priority_t pp; + if (flags & WQ_FLAG_THREAD_OUTSIDEQOS) { + self->wqoutsideqos = 1; + pp = _pthread_priority_make_from_thread_qos(THREAD_QOS_LEGACY, 0, + _PTHREAD_PRIORITY_FALLBACK_FLAG); + } else { + self->wqoutsideqos = 0; + pp = _pthread_wqthread_priority(flags); } -#if WQ_DEBUG - PTHREAD_ASSERT(self); - PTHREAD_ASSERT(self == pthread_self()); -#endif // WQ_DEBUG + self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp; - if (workloop) { + // avoid spills on the stack hard to keep used stack space minimal + if (nkevents == WORKQ_EXIT_THREAD_NKEVENT) { + goto exit; + } else if (flags & WQ_FLAG_THREAD_WORKLOOP) { self->fun = (void *(*)(void*))__libdispatch_workloopfunction; - } else if (kevent){ + self->wq_retop = WQOPS_THREAD_WORKLOOP_RETURN; + self->wq_kqid_ptr = ((kqueue_id_t *)keventlist - 1); + self->arg = keventlist; + self->wq_nevents = nkevents; + } else if (flags & WQ_FLAG_THREAD_KEVENT) { self->fun = (void *(*)(void*))__libdispatch_keventfunction; + self->wq_retop = WQOPS_THREAD_KEVENT_RETURN; + self->wq_kqid_ptr = NULL; + self->arg = keventlist; + self->wq_nevents = nkevents; } else { self->fun = (void *(*)(void*))__libdispatch_workerfunction; + self->wq_retop = WQOPS_THREAD_RETURN; + self->wq_kqid_ptr = NULL; + self->arg = (void *)(uintptr_t)pp; + self->wq_nevents = 0; + if (os_likely(__workq_newapi)) { + (*__libdispatch_workerfunction)(pp); + } else { + _pthread_wqthread_legacy_worker_wrap(pp); + } + goto just_return; } - self->arg = (void *)(uintptr_t)thread_class; - - if (kevent && keventlist && nkevents > 0){ - int errors_out; - kevent_errors_retry: - if (workloop) { - kqueue_id_t kevent_id = *(kqueue_id_t*)((char*)keventlist - sizeof(kqueue_id_t)); - kqueue_id_t kevent_id_in = kevent_id; - (__libdispatch_workloopfunction)(&kevent_id, &keventlist, &nkevents); - PTHREAD_ASSERT(kevent_id == kevent_id_in || nkevents == 0); - errors_out = __workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, keventlist, nkevents, 0); + if (nkevents > 0) { +kevent_errors_retry: + if (self->wq_retop == WQOPS_THREAD_WORKLOOP_RETURN) { + ((pthread_workqueue_function_workloop_t)self->fun) + (self->wq_kqid_ptr, &self->arg, &self->wq_nevents); } else { - (__libdispatch_keventfunction)(&keventlist, &nkevents); - errors_out = __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, keventlist, nkevents, 0); + ((pthread_workqueue_function_kevent_t)self->fun) + (&self->arg, &self->wq_nevents); } - - if (errors_out > 0){ - nkevents = errors_out; + int rc = __workq_kernreturn(self->wq_retop, self->arg, self->wq_nevents, 0); + if (os_unlikely(rc > 0)) { + self->wq_nevents = rc; goto kevent_errors_retry; - } else if (errors_out < 0){ - PTHREAD_ABORT("kevent return produced an error: %d", errno); - } - goto thexit; - } else if (kevent){ - if (workloop) { - (__libdispatch_workloopfunction)(0, NULL, NULL); - __workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, NULL, 0, -1); - } else { - (__libdispatch_keventfunction)(NULL, NULL); - __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, NULL, 0, 0); } - - goto thexit; - } - - if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) { - if (!__workq_newapi) { - /* Old thread priorities are inverted from where we have them in - * the new flexible priority scheme. The highest priority is zero, - * up to 2, with background at 3. - */ - pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction; - - int opts = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0; - - if ((__pthread_supported_features & PTHREAD_FEATURE_QOS_DEFAULT) == 0) { - /* Dirty hack to support kernels that don't have QOS_CLASS_DEFAULT. */ - switch (thread_class) { - case QOS_CLASS_USER_INTERACTIVE: - thread_class = QOS_CLASS_USER_INITIATED; - break; - case QOS_CLASS_USER_INITIATED: - thread_class = QOS_CLASS_DEFAULT; - break; - default: - break; - } - } - - switch (thread_class) { - /* QOS_CLASS_USER_INTERACTIVE is not currently requested by for old dispatch priority compatibility */ - case QOS_CLASS_USER_INITIATED: - (*func)(WORKQ_HIGH_PRIOQUEUE, opts, NULL); - break; - - case QOS_CLASS_DEFAULT: - /* B&I builders can't pass a QOS_CLASS_DEFAULT thread to dispatch, for fear of the QoS being - * picked up by NSThread (et al) and transported around the system. So change the TSD to - * make this thread look like QOS_CLASS_USER_INITIATED even though it will still run as legacy. - */ - _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, 0)); - (*func)(WORKQ_DEFAULT_PRIOQUEUE, opts, NULL); - break; - - case QOS_CLASS_UTILITY: - (*func)(WORKQ_LOW_PRIOQUEUE, opts, NULL); - break; - - case QOS_CLASS_BACKGROUND: - (*func)(WORKQ_BG_PRIOQUEUE, opts, NULL); - break; - - /* Legacy dispatch does not use QOS_CLASS_MAINTENANCE, so no need to handle it here */ - } - - } else { - /* "New" API, where dispatch is expecting to be given the thread priority */ - (*__libdispatch_workerfunction)(priority); + if (os_unlikely(rc < 0)) { + PTHREAD_INTERNAL_CRASH(self->err_no, "kevent (workloop) failed"); } } else { - /* We're the new library running on an old kext, so thread_class is really the workq priority. */ - pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction; - int options = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0; - (*func)(thread_class, options, NULL); - } - - __workq_kernreturn(WQOPS_THREAD_RETURN, NULL, 0, 0); - -thexit: - { - pthread_priority_t current_priority = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS); - if ((current_priority & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) || - (_pthread_priority_get_qos_newest(current_priority) > WQ_THREAD_CLEANUP_QOS)) { - // Reset QoS to something low for the cleanup process - priority = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0); - _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority); - } +just_return: + __workq_kernreturn(self->wq_retop, NULL, 0, 0); } - _pthread_exit(self, NULL); +exit: + _pthread_wqthread_exit(self); } -/***** pthread workqueue API for libdispatch *****/ + +#pragma mark pthread workqueue API for libdispatch + _Static_assert(WORKQ_KEVENT_EVENT_BUFFER_LEN == WQ_KEVENT_LIST_LEN, "Kernel and userland should agree on the event list size"); @@ -2329,42 +2365,18 @@ pthread_workqueue_addthreads_np(int queue_priority, int options, int numthreads) } pthread_priority_t kp = 0; + int compat_priority = queue_priority & WQ_FLAG_THREAD_PRIO_MASK; + int flags = 0; - if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) { - /* The new kernel API takes the new QoS class + relative priority style of - * priority. This entry point is here for compatibility with old libdispatch - * versions (ie. the simulator). We request the corresponding new bracket - * from the kernel, then on the way out run all dispatch queues that were - * requested. - */ - - int compat_priority = queue_priority & WQ_FLAG_THREAD_PRIOMASK; - int flags = 0; - - /* To make sure the library does not issue more threads to dispatch than - * were requested, the total number of active requests is recorded in - * __workq_requests. - */ - if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) { - flags = _PTHREAD_PRIORITY_OVERCOMMIT_FLAG; - } + if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) { + flags = _PTHREAD_PRIORITY_OVERCOMMIT_FLAG; + } #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" - kp = _pthread_qos_class_encode_workqueue(compat_priority, flags); + kp = _pthread_qos_class_encode_workqueue(compat_priority, flags); #pragma clang diagnostic pop - } else { - /* Running on the old kernel, queue_priority is what we pass directly to - * the syscall. - */ - kp = queue_priority & WQ_FLAG_THREAD_PRIOMASK; - - if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) { - kp |= WORKQUEUE_OVERCOMMIT; - } - } - res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)kp); if (res == -1) { res = errno; @@ -2391,9 +2403,17 @@ _pthread_workqueue_addthreads(int numthreads, pthread_priority_t priority) return EPERM; } - if ((__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) == 0) { - return ENOTSUP; - } +#if TARGET_OS_OSX + // Legacy simulators fail to boot + // + // Older sims set the deprecated _PTHREAD_PRIORITY_ROOTQUEUE_FLAG wrongly, + // which is aliased to _PTHREAD_PRIORITY_SCHED_PRI_FLAG and that XNU + // validates and rejects. + // + // As a workaround, forcefully unset this bit that cannot be set here + // anyway. + priority &= ~_PTHREAD_PRIORITY_SCHED_PRI_FLAG; +#endif res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)priority); if (res == -1) { @@ -2412,9 +2432,62 @@ _pthread_workqueue_set_event_manager_priority(pthread_priority_t priority) return res; } -/* - * Introspection SPI for libpthread. - */ +int +_pthread_workloop_create(uint64_t workloop_id, uint64_t options, pthread_attr_t *attr) +{ + struct kqueue_workloop_params params = { + .kqwlp_version = sizeof(struct kqueue_workloop_params), + .kqwlp_id = workloop_id, + .kqwlp_flags = 0, + }; + + if (!attr) { + return EINVAL; + } + + if (attr->schedset) { + params.kqwlp_flags |= KQ_WORKLOOP_CREATE_SCHED_PRI; + params.kqwlp_sched_pri = attr->param.sched_priority; + } + + if (attr->policyset) { + params.kqwlp_flags |= KQ_WORKLOOP_CREATE_SCHED_POL; + params.kqwlp_sched_pol = attr->policy; + } + + if (attr->cpupercentset) { + params.kqwlp_flags |= KQ_WORKLOOP_CREATE_CPU_PERCENT; + params.kqwlp_cpu_percent = attr->cpupercent; + params.kqwlp_cpu_refillms = attr->refillms; + } + + int res = __kqueue_workloop_ctl(KQ_WORKLOOP_CREATE, 0, ¶ms, + sizeof(params)); + if (res == -1) { + res = errno; + } + return res; +} + +int +_pthread_workloop_destroy(uint64_t workloop_id) +{ + struct kqueue_workloop_params params = { + .kqwlp_version = sizeof(struct kqueue_workloop_params), + .kqwlp_id = workloop_id, + }; + + int res = __kqueue_workloop_ctl(KQ_WORKLOOP_DESTROY, 0, ¶ms, + sizeof(params)); + if (res == -1) { + res = errno; + } + return res; +} + + +#pragma mark Introspection SPI for libpthread. + static pthread_introspection_hook_t _pthread_introspection_hook; @@ -2428,19 +2501,17 @@ pthread_introspection_hook_install(pthread_introspection_hook_t hook) PTHREAD_NOINLINE static void -_pthread_introspection_hook_callout_thread_create(pthread_t t, bool destroy) +_pthread_introspection_hook_callout_thread_create(pthread_t t) { _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_CREATE, t, t, PTHREAD_SIZE); - if (!destroy) return; - _pthread_introspection_thread_destroy(t); } static inline void -_pthread_introspection_thread_create(pthread_t t, bool destroy) +_pthread_introspection_thread_create(pthread_t t) { if (os_fastpath(!_pthread_introspection_hook)) return; - _pthread_introspection_hook_callout_thread_create(t, destroy); + _pthread_introspection_hook_callout_thread_create(t); } PTHREAD_NOINLINE @@ -2449,8 +2520,9 @@ _pthread_introspection_hook_callout_thread_start(pthread_t t) { size_t freesize; void *freeaddr; - if (t == &_thread) { - freesize = t->stacksize + t->guardsize; + if (t == main_thread()) { + size_t stacksize = t->stackaddr - t->stackbottom; + freesize = stacksize + t->guardsize; freeaddr = t->stackaddr - freesize; } else { freesize = t->freesize - PTHREAD_SIZE; @@ -2469,32 +2541,33 @@ _pthread_introspection_thread_start(pthread_t t) PTHREAD_NOINLINE static void -_pthread_introspection_hook_callout_thread_terminate(pthread_t t, - void *freeaddr, size_t freesize, bool destroy) +_pthread_introspection_hook_callout_thread_terminate(pthread_t t) { - if (destroy && freesize) { - freesize -= PTHREAD_SIZE; + size_t freesize; + void *freeaddr; + if (t == main_thread()) { + size_t stacksize = t->stackaddr - t->stackbottom; + freesize = stacksize + t->guardsize; + freeaddr = t->stackaddr - freesize; + } else { + freesize = t->freesize - PTHREAD_SIZE; + freeaddr = t->freeaddr; } _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_TERMINATE, t, freeaddr, freesize); - if (!destroy) return; - _pthread_introspection_thread_destroy(t); } static inline void -_pthread_introspection_thread_terminate(pthread_t t, void *freeaddr, - size_t freesize, bool destroy) +_pthread_introspection_thread_terminate(pthread_t t) { if (os_fastpath(!_pthread_introspection_hook)) return; - _pthread_introspection_hook_callout_thread_terminate(t, freeaddr, freesize, - destroy); + _pthread_introspection_hook_callout_thread_terminate(t); } PTHREAD_NOINLINE static void _pthread_introspection_hook_callout_thread_destroy(pthread_t t) { - if (t == &_thread) return; _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_DESTROY, t, t, PTHREAD_SIZE); } @@ -2506,3 +2579,37 @@ _pthread_introspection_thread_destroy(pthread_t t) _pthread_introspection_hook_callout_thread_destroy(t); } +#pragma mark libplatform shims + +#include + +// pthread_setup initializes large structures to 0, +// which the compiler turns into a library call to memset. +// +// To avoid linking against Libc, provide a simple wrapper +// that calls through to the libplatform primitives + +#undef memset +PTHREAD_NOEXPORT +void * +memset(void *b, int c, size_t len) +{ + return _platform_memset(b, c, len); +} + +#undef bzero +PTHREAD_NOEXPORT +void +bzero(void *s, size_t n) +{ + _platform_bzero(s, n); +} + +#undef memcpy +PTHREAD_NOEXPORT +void * +memcpy(void* a, const void* b, unsigned long s) +{ + return _platform_memmove(a, b, s); +} + diff --git a/src/pthread_asm.s b/src/pthread_asm.s index 8fc11c7..90afe46 100644 --- a/src/pthread_asm.s +++ b/src/pthread_asm.s @@ -21,6 +21,8 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include "offsets.h" + #if defined(__x86_64__) #include @@ -49,6 +51,51 @@ _thread_start: leave ret + .align 2, 0x90 + .globl _thread_chkstk_darwin +_thread_chkstk_darwin: + .globl ____chkstk_darwin +____chkstk_darwin: // %rax == alloca size + pushq %rcx + leaq 0x10(%rsp), %rcx + + // validate that the frame pointer is on our stack (no alt stack) + cmpq %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET + jb Lprobe + cmpq %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET + jae Lprobe + + // validate alloca size + subq %rax, %rcx + jb Lcrash + cmpq %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET + ja Lcrash + + popq %rcx + retq + +Lprobe: + // probe the stack when it's not ours (altstack or some shenanigan) + cmpq $0x1000, %rax + jb Lend + pushq %rax +Lloop: + subq $0x1000, %rcx + testq %rcx, (%rcx) + subq $0x1000, %rax + cmpq $0x1000, %rax + ja Lloop + popq %rax +Lend: + subq %rax, %rcx + testq %rcx, (%rcx) + + popq %rcx + retq + +Lcrash: + ud2 + #endif #elif defined(__i386__) @@ -91,6 +138,56 @@ _thread_start: leave ret + .align 2, 0x90 + .globl _thread_chkstk_darwin +_thread_chkstk_darwin: + .globl ____chkstk_darwin +____chkstk_darwin: // %eax == alloca size + pushl %ecx + pushl %edx + leal 0xc(%esp), %ecx + + // validate that the frame pointer is on our stack (no alt stack) + movl %gs:0x0, %edx // pthread_self() + cmpl %ecx, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET(%edx) + jb Lprobe + movl _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET(%edx), %edx + cmpl %ecx, %edx + jae Lprobe + + // validate alloca size + subl %eax, %ecx + jb Lcrash + cmpl %ecx, %edx + ja Lcrash + + popl %edx + popl %ecx + retl + +Lprobe: + // probe the stack when it's not ours (altstack or some shenanigan) + cmpl $0x1000, %eax + jb Lend + pushl %eax +Lloop: + subl $0x1000, %ecx + testl %ecx, (%ecx) + subl $0x1000, %eax + cmpl $0x1000, %eax + ja Lloop + popl %eax +Lend: + subl %eax, %ecx + testl %ecx, (%ecx) + + popl %edx + popl %ecx + retl + +Lcrash: + ud2 + #endif #elif defined(__arm__) diff --git a/src/pthread_cancelable.c b/src/pthread_cancelable.c index 894178c..8bb9c08 100644 --- a/src/pthread_cancelable.c +++ b/src/pthread_cancelable.c @@ -60,10 +60,10 @@ #include #include #include +#include #include #include -extern int __unix_conforming; extern int _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime, @@ -73,16 +73,27 @@ extern int __sigwait(const sigset_t *set, int *sig); extern int __pthread_sigmask(int, const sigset_t *, sigset_t *); extern int __pthread_markcancel(mach_port_t); extern int __pthread_canceled(int); +extern int __semwait_signal_nocancel(int, int, int, int, __int64_t, __int32_t); -#ifdef VARIANT_CANCELABLE -extern int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, __int64_t tv_sec, __int32_t tv_nsec); -#else -extern int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, __int64_t tv_sec, __int32_t tv_nsec) __asm__("___semwait_signal_nocancel"); -#endif PTHREAD_NOEXPORT -int _pthread_join(pthread_t thread, void **value_ptr, int conforming, - int (*_semwait_signal)(int, int, int, int, __int64_t, __int32_t)); +int _pthread_join(pthread_t thread, void **value_ptr, int conforming); + +static inline int +_pthread_conformance(void) +{ +#if __DARWIN_UNIX03 + if (__unix_conforming == 0) + __unix_conforming = 1; +#ifdef VARIANT_CANCELABLE + return PTHREAD_CONFORM_UNIX03_CANCELABLE; +#else /* !VARIANT_CANCELABLE */ + return PTHREAD_CONFORM_UNIX03_NOCANCEL; +#endif +#else /* __DARWIN_UNIX03 */ + return PTHREAD_CONFORM_DARWIN_LEGACY; +#endif /* __DARWIN_UNIX03 */ +} #ifndef VARIANT_CANCELABLE @@ -111,7 +122,7 @@ pthread_cancel(pthread_t thread) __unix_conforming = 1; #endif /* __DARWIN_UNIX03 */ - if (!_pthread_is_valid(thread, 0, NULL)) { + if (!_pthread_is_valid(thread, NULL)) { return(ESRCH); } @@ -135,15 +146,7 @@ pthread_cancel(pthread_t thread) void pthread_testcancel(void) { - pthread_t self = pthread_self(); - -#if __DARWIN_UNIX03 - if (__unix_conforming == 0) - __unix_conforming = 1; - _pthread_testcancel(self, 1); -#else /* __DARWIN_UNIX03 */ - _pthread_testcancel(self, 0); -#endif /* __DARWIN_UNIX03 */ + _pthread_testcancel(_pthread_conformance()); } #ifndef BUILDING_VARIANT /* [ */ @@ -154,23 +157,32 @@ _pthread_exit_if_canceled(int error) { if (((error & 0xff) == EINTR) && __unix_conforming && (__pthread_canceled(0) == 0)) { pthread_t self = pthread_self(); - if (self != NULL) { - self->cancel_error = error; - } + + self->cancel_error = error; + self->canceled = true; pthread_exit(PTHREAD_CANCELED); } } -PTHREAD_NOEXPORT_VARIANT -void -_pthread_testcancel(pthread_t thread, int isconforming) +static inline bool +_pthread_is_canceled(pthread_t thread) { const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING); - int state = os_atomic_load2o(thread, cancel_state, seq_cst); - if ((state & flags) == flags) { - pthread_exit(isconforming ? PTHREAD_CANCELED : 0); + return (state & flags) == flags; +} + +PTHREAD_NOEXPORT_VARIANT +void +_pthread_testcancel(int isconforming) +{ + pthread_t self = pthread_self(); + if (_pthread_is_canceled(self)) { + // 4597450: begin + self->canceled = (isconforming != PTHREAD_CONFORM_DARWIN_LEGACY); + // 4597450: end + pthread_exit(isconforming ? PTHREAD_CANCELED : NULL); } } @@ -179,7 +191,6 @@ void _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport) { const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING); - int state = os_atomic_or2o(thread, cancel_state, _PTHREAD_CANCEL_INITIALIZED, relaxed); if ((state & flags) == flags && __unix_conforming) { @@ -187,35 +198,14 @@ _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport) } } -PTHREAD_NOEXPORT -void * -_pthread_get_exit_value(pthread_t thread, int conforming) -{ - const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING); - void *value = thread->exit_value; - - if (conforming) { - int state = os_atomic_load2o(thread, cancel_state, seq_cst); - if ((state & flags) == flags) { - value = PTHREAD_CANCELED; - } - } - return value; -} - /* When a thread exits set the cancellation state to DISABLE and DEFERRED */ PTHREAD_NOEXPORT void -_pthread_setcancelstate_exit(pthread_t thread, void *value_ptr, int conforming) +_pthread_setcancelstate_exit(pthread_t thread, void *value_ptr) { _pthread_update_cancel_state(thread, _PTHREAD_CANCEL_STATE_MASK | _PTHREAD_CANCEL_TYPE_MASK, PTHREAD_CANCEL_DISABLE | PTHREAD_CANCEL_DEFERRED); - if (value_ptr == PTHREAD_CANCELED) { - _PTHREAD_LOCK(thread->lock); - thread->detached |= _PTHREAD_WASCANCEL; // 4597450 - _PTHREAD_UNLOCK(thread->lock); - } } #endif /* !BUILDING_VARIANT ] */ @@ -227,30 +217,30 @@ PTHREAD_ALWAYS_INLINE static inline int _pthread_setcancelstate_internal(int state, int *oldstateptr, int conforming) { - pthread_t self; + pthread_t self = pthread_self(); switch (state) { - case PTHREAD_CANCEL_ENABLE: - if (conforming) { - __pthread_canceled(1); - } - break; - case PTHREAD_CANCEL_DISABLE: - if (conforming) { - __pthread_canceled(2); - } - break; - default: - return EINVAL; + case PTHREAD_CANCEL_ENABLE: + if (conforming) { + __pthread_canceled(1); + } + break; + case PTHREAD_CANCEL_DISABLE: + if (conforming) { + __pthread_canceled(2); + } + break; + default: + return EINVAL; } - self = pthread_self(); int oldstate = _pthread_update_cancel_state(self, _PTHREAD_CANCEL_STATE_MASK, state); if (oldstateptr) { *oldstateptr = oldstate & _PTHREAD_CANCEL_STATE_MASK; } if (!conforming) { - _pthread_testcancel(self, 0); /* See if we need to 'die' now... */ + /* See if we need to 'die' now... */ + _pthread_testcancel(PTHREAD_CONFORM_DARWIN_LEGACY); } return 0; } @@ -292,7 +282,8 @@ pthread_setcanceltype(int type, int *oldtype) *oldtype = oldstate & _PTHREAD_CANCEL_TYPE_MASK; } #if !__DARWIN_UNIX03 - _pthread_testcancel(self, 0); /* See if we need to 'die' now... */ + /* See if we need to 'die' now... */ + _pthread_testcancel(PTHREAD_CONFORM_DARWIN_LEGACY); #endif /* __DARWIN_UNIX03 */ return (0); } @@ -315,76 +306,196 @@ pthread_sigmask(int how, const sigset_t * set, sigset_t * oset) #ifndef BUILDING_VARIANT /* [ */ -static void -__posix_join_cleanup(void *arg) +typedef struct pthread_join_context_s { + pthread_t waiter; + void **value_ptr; + mach_port_t kport; + semaphore_t custom_stack_sema; + bool detached; +} pthread_join_context_s, *pthread_join_context_t; + +static inline void * +_pthread_get_exit_value(pthread_t thread) { - pthread_t thread = (pthread_t)arg; + if (__unix_conforming && _pthread_is_canceled(thread)) { + return PTHREAD_CANCELED; + } + return thread->tl_exit_value; +} - _PTHREAD_LOCK(thread->lock); - /* leave another thread to join */ - thread->joiner = (struct _pthread *)NULL; - _PTHREAD_UNLOCK(thread->lock); +// called with _pthread_list_lock held +PTHREAD_NOEXPORT +semaphore_t +_pthread_joiner_prepost_wake(pthread_t thread) +{ + pthread_join_context_t ctx = thread->tl_join_ctx; + semaphore_t sema = MACH_PORT_NULL; + + if (thread->tl_joinable) { + sema = ctx->custom_stack_sema; + thread->tl_joinable = false; + } else { + ctx->detached = true; + thread->tl_join_ctx = NULL; + } + if (ctx->value_ptr) *ctx->value_ptr = _pthread_get_exit_value(thread); + return sema; +} + +static inline bool +_pthread_joiner_abort_wait(pthread_t thread, pthread_join_context_t ctx) +{ + bool aborted = false; + + _PTHREAD_LOCK(_pthread_list_lock); + if (!ctx->detached && thread->tl_exit_gate != MACH_PORT_DEAD) { + /* + * _pthread_joiner_prepost_wake() didn't happen + * allow another thread to join + */ +#if DEBUG + PTHREAD_ASSERT(thread->tl_join_ctx == ctx); +#endif + thread->tl_join_ctx = NULL; + thread->tl_exit_gate = MACH_PORT_NULL; + aborted = true; + } + _PTHREAD_UNLOCK(_pthread_list_lock); + return aborted; +} + +static int +_pthread_joiner_wait(pthread_t thread, pthread_join_context_t ctx, int conforming) +{ + uint32_t *exit_gate = &thread->tl_exit_gate; + int ulock_op = UL_UNFAIR_LOCK | ULF_NO_ERRNO; + + if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) { + ulock_op |= ULF_WAIT_CANCEL_POINT; + } + + for (;;) { + uint32_t cur = os_atomic_load(exit_gate, acquire); + if (cur == MACH_PORT_DEAD) { + break; + } + if (os_unlikely(cur != ctx->kport)) { + PTHREAD_CLIENT_CRASH(cur, "pthread_join() state corruption"); + } + int ret = __ulock_wait(ulock_op, exit_gate, ctx->kport, 0); + switch (-ret) { + case 0: + case EFAULT: + break; + case EINTR: + /* + * POSIX says: + * + * As specified, either the pthread_join() call is canceled, or it + * succeeds, but not both. The difference is obvious to the + * application, since either a cancellation handler is run or + * pthread_join() returns. + * + * When __ulock_wait() returns EINTR, we check if we have been + * canceled, and if we have, we try to abort the wait. + * + * If we can't, it means the other thread finished the join while we + * were being canceled and commited the waiter to return from + * pthread_join(). Returning from the join then takes precedence + * over the cancelation which will be acted upon at the next + * cancelation point. + */ + if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE && + _pthread_is_canceled(ctx->waiter)) { + if (_pthread_joiner_abort_wait(thread, ctx)) { + ctx->waiter->canceled = true; + pthread_exit(PTHREAD_CANCELED); + } + } + break; + } + } + + bool cleanup = false; + + _PTHREAD_LOCK(_pthread_list_lock); + // If pthread_detach() was called, we can't safely dereference the thread, + // else, decide who gets to deallocate the thread (see _pthread_terminate). + if (!ctx->detached) { +#if DEBUG + PTHREAD_ASSERT(thread->tl_join_ctx == ctx); +#endif + thread->tl_join_ctx = NULL; + cleanup = thread->tl_joiner_cleans_up; + } + _PTHREAD_UNLOCK(_pthread_list_lock); + + if (cleanup) { + _pthread_deallocate(thread, false); + } + return 0; } PTHREAD_NOEXPORT PTHREAD_NOINLINE int -_pthread_join(pthread_t thread, void **value_ptr, int conforming, - int (*_semwait_signal)(int, int, int, int, __int64_t, __int32_t)) +_pthread_join(pthread_t thread, void **value_ptr, int conforming) { - int res = 0; pthread_t self = pthread_self(); - kern_return_t kern_res; - semaphore_t joinsem, death = (semaphore_t)os_get_cached_semaphore(); + pthread_join_context_s ctx = { + .waiter = self, + .value_ptr = value_ptr, + .kport = MACH_PORT_NULL, + .custom_stack_sema = MACH_PORT_NULL, + }; + int res = 0; + kern_return_t kr; - if (!_pthread_is_valid(thread, PTHREAD_IS_VALID_LOCK_THREAD, NULL)) { - res = ESRCH; - goto out; + if (!_pthread_validate_thread_and_list_lock(thread)) { + return ESRCH; } - if (thread->sig != _PTHREAD_SIG) { - res = ESRCH; - } else if ((thread->detached & PTHREAD_CREATE_DETACHED) || - !(thread->detached & PTHREAD_CREATE_JOINABLE) || - (thread->joiner != NULL)) { + if (!thread->tl_joinable || (thread->tl_join_ctx != NULL)) { res = EINVAL; - } else if (thread == self || (self != NULL && self->joiner == thread)) { + } else if (thread == self || + (self->tl_join_ctx && self->tl_join_ctx->waiter == thread)) { res = EDEADLK; + } else if (thread->tl_exit_gate == MACH_PORT_DEAD) { + TAILQ_REMOVE(&__pthread_head, thread, tl_plist); +#if DEBUG + PTHREAD_ASSERT(thread->tl_joiner_cleans_up); +#endif + thread->tl_joinable = false; + if (value_ptr) *value_ptr = _pthread_get_exit_value(thread); + } else { + ctx.kport = _pthread_kernel_thread(thread); + thread->tl_exit_gate = ctx.kport; + thread->tl_join_ctx = &ctx; + if (thread->tl_has_custom_stack) { + ctx.custom_stack_sema = (semaphore_t)os_get_cached_semaphore(); + } } - if (res != 0) { - _PTHREAD_UNLOCK(thread->lock); - goto out; - } + _PTHREAD_UNLOCK(_pthread_list_lock); - joinsem = thread->joiner_notify; - if (joinsem == SEMAPHORE_NULL) { - thread->joiner_notify = joinsem = death; - death = MACH_PORT_NULL; + if (res == 0) { + if (ctx.kport == MACH_PORT_NULL) { + _pthread_deallocate(thread, false); + } else { + res = _pthread_joiner_wait(thread, &ctx, conforming); + } } - thread->joiner = self; - _PTHREAD_UNLOCK(thread->lock); - - if (conforming) { - /* Wait for it to signal... */ - pthread_cleanup_push(__posix_join_cleanup, (void *)thread); - do { - res = _semwait_signal(joinsem, 0, 0, 0, 0, 0); - } while ((res < 0) && (errno == EINTR)); - pthread_cleanup_pop(0); - } else { - /* Wait for it to signal... */ - kern_return_t (*_semaphore_wait)(semaphore_t) = - (void*)_semwait_signal; + if (res == 0 && ctx.custom_stack_sema && !ctx.detached) { + // threads with a custom stack need to make sure _pthread_terminate + // returned before the joiner is unblocked, the joiner may quickly + // deallocate the stack with rather dire consequences. + // + // When we reach this point we know the pthread_join has to succeed + // so this can't be a cancelation point. do { - kern_res = _semaphore_wait(joinsem); - } while (kern_res != KERN_SUCCESS); + kr = __semwait_signal_nocancel(ctx.custom_stack_sema, 0, 0, 0, 0, 0); + } while (kr != KERN_SUCCESS); } - - os_put_cached_semaphore((os_semaphore_t)joinsem); - res = _pthread_join_cleanup(thread, value_ptr, conforming); - -out: - if (death) { - os_put_cached_semaphore(death); + if (ctx.custom_stack_sema) { + os_put_cached_semaphore(ctx.custom_stack_sema); } return res; } @@ -398,82 +509,45 @@ out: int pthread_join(pthread_t thread, void **value_ptr) { -#if __DARWIN_UNIX03 - if (__unix_conforming == 0) - __unix_conforming = 1; - -#ifdef VARIANT_CANCELABLE - _pthread_testcancel(pthread_self(), 1); -#endif /* VARIANT_CANCELABLE */ - return _pthread_join(thread, value_ptr, 1, __semwait_signal); -#else - return _pthread_join(thread, value_ptr, 0, (void*)semaphore_wait); -#endif /* __DARWIN_UNIX03 */ - + int conforming = _pthread_conformance(); + if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) { + _pthread_testcancel(conforming); + } + return _pthread_join(thread, value_ptr, conforming); } int -pthread_cond_wait(pthread_cond_t *cond, - pthread_mutex_t *mutex) +pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { - int conforming; -#if __DARWIN_UNIX03 - - if (__unix_conforming == 0) - __unix_conforming = 1; - -#ifdef VARIANT_CANCELABLE - conforming = 1; -#else /* !VARIANT_CANCELABLE */ - conforming = -1; -#endif /* VARIANT_CANCELABLE */ -#else /* __DARWIN_UNIX03 */ - conforming = 0; -#endif /* __DARWIN_UNIX03 */ - return (_pthread_cond_wait(cond, mutex, (struct timespec *)NULL, 0, conforming)); + return _pthread_cond_wait(cond, mutex, NULL, 0, _pthread_conformance()); } int -pthread_cond_timedwait(pthread_cond_t *cond, - pthread_mutex_t *mutex, - const struct timespec *abstime) +pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime) { - int conforming; -#if __DARWIN_UNIX03 - if (__unix_conforming == 0) - __unix_conforming = 1; - -#ifdef VARIANT_CANCELABLE - conforming = 1; -#else /* !VARIANT_CANCELABLE */ - conforming = -1; -#endif /* VARIANT_CANCELABLE */ -#else /* __DARWIN_UNIX03 */ - conforming = 0; -#endif /* __DARWIN_UNIX03 */ - - return (_pthread_cond_wait(cond, mutex, abstime, 0, conforming)); + return _pthread_cond_wait(cond, mutex, abstime, 0, _pthread_conformance()); } int sigwait(const sigset_t * set, int * sig) { #if __DARWIN_UNIX03 - int err = 0; + int err = 0, conformance = _pthread_conformance(); if (__unix_conforming == 0) __unix_conforming = 1; -#ifdef VARIANT_CANCELABLE - _pthread_testcancel(pthread_self(), 1); -#endif /* VARIANT_CANCELABLE */ + if (conformance == PTHREAD_CONFORM_UNIX03_CANCELABLE) { + _pthread_testcancel(conformance); + } if (__sigwait(set, sig) == -1) { err = errno; -#ifdef VARIANT_CANCELABLE - _pthread_testcancel(pthread_self(), 1); -#endif /* VARIANT_CANCELABLE */ + if (conformance == PTHREAD_CONFORM_UNIX03_CANCELABLE) { + _pthread_testcancel(conformance); + } /* * EINTR that isn't a result of pthread_cancel() diff --git a/src/pthread_cond.c b/src/pthread_cond.c index be55e1d..79e38ba 100644 --- a/src/pthread_cond.c +++ b/src/pthread_cond.c @@ -59,7 +59,6 @@ #endif /* PLOCKSTAT */ extern int __gettimeofday(struct timeval *, struct timezone *); -extern void _pthread_testcancel(pthread_t thread, int isconforming); PTHREAD_NOEXPORT int _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex, @@ -88,8 +87,8 @@ COND_GETSEQ_ADDR(_pthread_cond *cond, #ifndef BUILDING_VARIANT /* [ */ static void _pthread_cond_cleanup(void *arg); -static void _pthread_cond_updateval(_pthread_cond * cond, int error, - uint32_t updateval); +static void _pthread_cond_updateval(_pthread_cond *cond, _pthread_mutex *mutex, + int error, uint32_t updateval); int @@ -401,7 +400,7 @@ _pthread_cond_signal(pthread_cond_t *ocond, bool broadcast, mach_port_t thread) } if (updateval != (uint32_t)-1 && updateval != 0) { - _pthread_cond_updateval(cond, 0, updateval); + _pthread_cond_updateval(cond, NULL, 0, updateval); } return 0; @@ -449,8 +448,8 @@ pthread_cond_signal(pthread_cond_t *ocond) * Suspend waiting for a condition variable. * Note: we have to keep a list of condition variables which are using * this same mutex variable so we can detect invalid 'destroy' sequences. - * If isconforming < 0, we skip the _pthread_testcancel(), but keep the - * remaining conforming behavior.. + * If conformance is not cancelable, we skip the _pthread_testcancel(), + * but keep the remaining conforming behavior.. */ PTHREAD_NOEXPORT PTHREAD_NOINLINE int @@ -458,7 +457,7 @@ _pthread_cond_wait(pthread_cond_t *ocond, pthread_mutex_t *omutex, const struct timespec *abstime, int isRelative, - int isconforming) + int conforming) { int res; _pthread_cond *cond = (_pthread_cond *)ocond; @@ -477,13 +476,13 @@ _pthread_cond_wait(pthread_cond_t *ocond, return res; } - if (isconforming) { + if (conforming) { if (!_pthread_mutex_check_signature(mutex) && !_pthread_mutex_check_signature_init(mutex)) { return EINVAL; } - if (isconforming > 0) { - _pthread_testcancel(pthread_self(), 1); + if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) { + _pthread_testcancel(conforming); } } @@ -505,7 +504,7 @@ _pthread_cond_wait(pthread_cond_t *ocond, if (then.tv_sec < 0 || (then.tv_sec == 0 && then.tv_nsec == 0)) { return ETIMEDOUT; } - if (isconforming && + if (conforming && (abstime->tv_sec < 0 || abstime->tv_nsec < 0 || abstime->tv_nsec >= NSEC_PER_SEC)) { @@ -518,7 +517,7 @@ _pthread_cond_wait(pthread_cond_t *ocond, return ETIMEDOUT; } } - if (isconforming && (then.tv_sec < 0 || then.tv_nsec < 0)) { + if (conforming && (then.tv_sec < 0 || then.tv_nsec < 0)) { return EINVAL; } if (then.tv_nsec >= NSEC_PER_SEC) { @@ -567,10 +566,10 @@ _pthread_cond_wait(pthread_cond_t *ocond, cvlsgen = ((uint64_t)(ulval | savebits)<< 32) | nlval; // SUSv3 requires pthread_cond_wait to be a cancellation point - if (isconforming) { + if (conforming) { pthread_cleanup_push(_pthread_cond_cleanup, (void *)cond); updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec); - _pthread_testcancel(pthread_self(), isconforming); + _pthread_testcancel(conforming); pthread_cleanup_pop(0); } else { updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec); @@ -592,12 +591,12 @@ _pthread_cond_wait(pthread_cond_t *ocond, } // add unlock ref to show one less waiter - _pthread_cond_updateval(cond, err, 0); + _pthread_cond_updateval(cond, mutex, err, 0); } else if (updateval != 0) { // Successful wait // The return due to prepost and might have bit states // update S and return for prepo if needed - _pthread_cond_updateval(cond, 0, updateval); + _pthread_cond_updateval(cond, mutex, 0, updateval); } pthread_mutex_lock(omutex); @@ -609,25 +608,20 @@ static void _pthread_cond_cleanup(void *arg) { _pthread_cond *cond = (_pthread_cond *)arg; + pthread_t thread = pthread_self(); pthread_mutex_t *mutex; // 4597450: begin - pthread_t thread = pthread_self(); - int thcanceled = 0; - - _PTHREAD_LOCK(thread->lock); - thcanceled = (thread->detached & _PTHREAD_WASCANCEL); - _PTHREAD_UNLOCK(thread->lock); - - if (thcanceled == 0) { + if (!thread->canceled) { return; } - // 4597450: end + mutex = (pthread_mutex_t *)cond->busy; // add unlock ref to show one less waiter - _pthread_cond_updateval(cond, thread->cancel_error, 0); + _pthread_cond_updateval(cond, (_pthread_mutex *)mutex, + thread->cancel_error, 0); /* ** Can't do anything if this fails -- we're on the way out @@ -637,11 +631,9 @@ _pthread_cond_cleanup(void *arg) } } -#define ECVCERORR 256 -#define ECVPERORR 512 - static void -_pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval) +_pthread_cond_updateval(_pthread_cond *cond, _pthread_mutex *mutex, + int error, uint32_t updateval) { int needclearpre; @@ -653,10 +645,10 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval) if (error != 0) { updateval = PTHRW_INC; - if ((error & ECVCERORR) != 0) { + if (error & ECVCLEARED) { updateval |= PTH_RWS_CV_CBIT; } - if ((error & ECVPERORR) != 0) { + if (error & ECVPREPOST) { updateval |= PTH_RWS_CV_PBIT; } } @@ -675,7 +667,10 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval) oldval64 = (((uint64_t)scntval) << 32); oldval64 |= lcntval; - if (diffgen <= 0) { + PTHREAD_TRACE(psynch_cvar_updateval | DBG_FUNC_START, cond, oldval64, + updateval, 0); + + if (diffgen <= 0 && !is_rws_pbit_set(updateval)) { /* TBD: Assert, should not be the case */ /* validate it is spurious and return */ newval64 = oldval64; @@ -700,19 +695,22 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval) } } while (!os_atomic_cmpxchg(c_lsseqaddr, oldval64, newval64, seq_cst)); + PTHREAD_TRACE(psynch_cvar_updateval | DBG_FUNC_END, cond, newval64, + (uint64_t)diffgen << 32 | needclearpre, 0); + if (diffgen > 0) { // if L == S, then reset associated mutex if ((nsval & PTHRW_COUNT_MASK) == (lcntval & PTHRW_COUNT_MASK)) { cond->busy = NULL; } + } - if (needclearpre != 0) { - uint32_t flags = 0; - if (cond->pshared == PTHREAD_PROCESS_SHARED) { - flags |= _PTHREAD_MTX_OPT_PSHARED; - } - (void)__psynch_cvclrprepost(cond, lcntval, ucntval, nsval, 0, lcntval, flags); + if (needclearpre) { + uint32_t flags = 0; + if (cond->pshared == PTHREAD_PROCESS_SHARED) { + flags |= _PTHREAD_MTX_OPT_PSHARED; } + (void)__psynch_cvclrprepost(cond, lcntval, ucntval, nsval, 0, lcntval, flags); } } diff --git a/src/pthread_dependency.c b/src/pthread_dependency.c new file mode 100644 index 0000000..282dfc3 --- /dev/null +++ b/src/pthread_dependency.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include "resolver.h" +#include "internal.h" +#include "dependency_private.h" +#include + +#define PREREQUISITE_FULFILLED (~0u) + +PTHREAD_NOEXPORT +void _pthread_dependency_fulfill_slow(pthread_dependency_t *pr, uint32_t old); + +OS_ALWAYS_INLINE +static inline mach_port_t +_pthread_dependency_self(void) +{ + void *v = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF); + return (mach_port_t)(uintptr_t)v; +} + +void +pthread_dependency_init_np(pthread_dependency_t *pr, pthread_t pth, + pthread_dependency_attr_t *attrs) +{ + if (attrs) *(volatile char *)attrs; + *pr = (pthread_dependency_t)PTHREAD_DEPENDENCY_INITIALIZER_NP(pth); +} + +OS_NOINLINE +void +_pthread_dependency_fulfill_slow(pthread_dependency_t *pr, uint32_t old) +{ + if (old == PREREQUISITE_FULFILLED) { + PTHREAD_CLIENT_CRASH(0, "Fufilling pthread_dependency_t twice"); + } + if (os_unlikely(old != _pthread_dependency_self())) { + PTHREAD_CLIENT_CRASH(old, "Fulfilled a dependency " + "not owned by current thread"); + } + + int ret = __ulock_wake(UL_UNFAIR_LOCK | ULF_NO_ERRNO, &pr->__pdep_opaque1, 0); + switch (-ret) { + case 0: + case ENOENT: + return; + default: + PTHREAD_INTERNAL_CRASH(-ret, "__ulock_wake() failed"); + } +} + + +void +pthread_dependency_fulfill_np(pthread_dependency_t *pr, void *value) +{ + uint32_t old; + + pr->__pdep_opaque2 = (uint64_t)(uintptr_t)value; + old = os_atomic_xchg(&pr->__pdep_opaque1, PREREQUISITE_FULFILLED, release); + + if (old != 0) _pthread_dependency_fulfill_slow(pr, old); +} + +void * +pthread_dependency_wait_np(pthread_dependency_t *pr) +{ + if (os_atomic_cmpxchg(&pr->__pdep_opaque1, 0, pr->__pdep_owner, relaxed)) { + int ret; + again: + ret = __ulock_wait(UL_UNFAIR_LOCK | ULF_NO_ERRNO, &pr->__pdep_opaque1, + pr->__pdep_owner, 0); + switch (-ret) { + case EFAULT: + if (pr->__pdep_opaque1 == pr->__pdep_owner) goto again; + case 0: + break; + case EOWNERDEAD: + PTHREAD_CLIENT_CRASH(pr->__pdep_owner, "Waiting on orphaned dependency"); + default: + PTHREAD_CLIENT_CRASH(-ret, "__ulock_wait() failed"); + } + } + + uint32_t cur = os_atomic_load(&pr->__pdep_opaque1, acquire); + if (cur == PREREQUISITE_FULFILLED) { + return (void *)(uintptr_t)pr->__pdep_opaque2; + } + PTHREAD_CLIENT_CRASH(cur, "Corrupted pthread_dependency_t"); +} + diff --git a/src/pthread_mutex.c b/src/pthread_mutex.c index a68503c..edc97ee 100644 --- a/src/pthread_mutex.c +++ b/src/pthread_mutex.c @@ -54,8 +54,6 @@ #include "internal.h" #include "kern/kern_trace.h" -extern int __unix_conforming; - #ifndef BUILDING_VARIANT /* [ */ #ifdef PLOCKSTAT @@ -85,31 +83,73 @@ _plockstat_never_fired(void) #define PTHREAD_MUTEX_INIT_UNUSED 1 +PTHREAD_NOEXPORT PTHREAD_WEAK +int _pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock); + +PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers +int _pthread_mutex_fairshare_lock_slow(_pthread_mutex *mutex, bool trylock); + PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers -int _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock); +int _pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock); PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers -int _pthread_mutex_unlock_slow(pthread_mutex_t *omutex); +int _pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex); + +PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers +int _pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex); PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers int _pthread_mutex_corruption_abort(_pthread_mutex *mutex); -extern int __pthread_mutex_default_policy PTHREAD_NOEXPORT; +extern int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT; + + +int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT = + _PTHREAD_MTX_OPT_POLICY_DEFAULT; +static inline bool +_pthread_mutex_policy_validate(int policy) +{ + return (policy >= 0 && policy < _PTHREAD_MUTEX_POLICY_LAST); +} -int __pthread_mutex_default_policy PTHREAD_NOEXPORT = - _PTHREAD_MUTEX_POLICY_FAIRSHARE; +static inline int +_pthread_mutex_policy_to_opt(int policy) +{ + switch (policy) { + case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP: + return _PTHREAD_MTX_OPT_POLICY_FAIRSHARE; + case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP: + return _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; + default: + __builtin_unreachable(); + } +} PTHREAD_NOEXPORT void _pthread_mutex_global_init(const char *envp[], struct _pthread_registration_data *registration_data) { + + int opt = _PTHREAD_MTX_OPT_POLICY_DEFAULT; + if (registration_data->mutex_default_policy) { + int policy = registration_data->mutex_default_policy; + if (_pthread_mutex_policy_validate(policy)) { + opt = _pthread_mutex_policy_to_opt(policy); + } + } + const char *envvar = _simple_getenv(envp, "PTHREAD_MUTEX_DEFAULT_POLICY"); - if ((envvar && (envvar[0] - '0') == _PTHREAD_MUTEX_POLICY_FIRSTFIT) || - (registration_data->mutex_default_policy == - _PTHREAD_MUTEX_POLICY_FIRSTFIT)) { - __pthread_mutex_default_policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT; + if (envvar) { + int policy = envvar[0] - '0'; + if (_pthread_mutex_policy_validate(policy)) { + opt = _pthread_mutex_policy_to_opt(policy); + } + } + + if (opt != __pthread_mutex_default_opt_policy) { + __pthread_mutex_default_opt_policy = opt; } } @@ -162,7 +202,7 @@ mutex_seq_load(mutex_seq *seqaddr, mutex_seq *oldseqval) #define mutex_seq_atomic_load(seqaddr, oldseqval, m) \ mutex_seq_atomic_load_##m(seqaddr, oldseqval) -PTHREAD_ALWAYS_INLINE +PTHREAD_ALWAYS_INLINE PTHREAD_USED static inline bool mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval, mutex_seq *newseqval) @@ -171,7 +211,7 @@ mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval, newseqval->seq_LU, &oldseqval->seq_LU, relaxed); } -PTHREAD_ALWAYS_INLINE +PTHREAD_ALWAYS_INLINE PTHREAD_USED static inline bool mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval, mutex_seq *newseqval) @@ -180,7 +220,7 @@ mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval, newseqval->seq_LU, &oldseqval->seq_LU, acquire); } -PTHREAD_ALWAYS_INLINE +PTHREAD_ALWAYS_INLINE PTHREAD_USED static inline bool mutex_seq_atomic_cmpxchgv_release(mutex_seq *seqaddr, mutex_seq *oldseqval, mutex_seq *newseqval) @@ -274,8 +314,16 @@ pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *attr, int *policy) { int res = EINVAL; if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) { - *policy = attr->policy; - res = 0; + switch (attr->opt) { + case _PTHREAD_MTX_OPT_POLICY_FAIRSHARE: + *policy = PTHREAD_MUTEX_POLICY_FAIRSHARE_NP; + res = 0; + break; + case _PTHREAD_MTX_OPT_POLICY_FIRSTFIT: + *policy = PTHREAD_MUTEX_POLICY_FIRSTFIT_NP; + res = 0; + break; + } } return res; } @@ -307,7 +355,7 @@ pthread_mutexattr_init(pthread_mutexattr_t *attr) { attr->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING; attr->protocol = _PTHREAD_DEFAULT_PROTOCOL; - attr->policy = __pthread_mutex_default_policy; + attr->opt = __pthread_mutex_default_opt_policy; attr->type = PTHREAD_MUTEX_DEFAULT; attr->sig = _PTHREAD_MUTEX_ATTR_SIG; attr->pshared = _PTHREAD_DEFAULT_PSHARED; @@ -349,12 +397,18 @@ pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *attr, int policy) { int res = EINVAL; if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) { + // the first-fit implementation was broken + // pre-Liberty so this mapping exists to ensure that the old first-fit + // define (2) is no longer valid when used on older systems. switch (policy) { - case _PTHREAD_MUTEX_POLICY_FAIRSHARE: - case _PTHREAD_MUTEX_POLICY_FIRSTFIT: - attr->policy = policy; - res = 0; - break; + case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP: + attr->opt = _PTHREAD_MTX_OPT_POLICY_FAIRSHARE; + res = 0; + break; + case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP: + attr->opt = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; + res = 0; + break; } } return res; @@ -412,6 +466,115 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex) } +PTHREAD_NOINLINE +static int +_pthread_mutex_check_init_slow(_pthread_mutex *mutex) +{ + int res = EINVAL; + + if (_pthread_mutex_check_signature_init(mutex)) { + _PTHREAD_LOCK(mutex->lock); + if (_pthread_mutex_check_signature_init(mutex)) { + // initialize a statically initialized mutex to provide + // compatibility for misbehaving applications. + // (unlock should not be the first operation on a mutex) + res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf)); + } else if (_pthread_mutex_check_signature(mutex)) { + res = 0; + } + _PTHREAD_UNLOCK(mutex->lock); + } else if (_pthread_mutex_check_signature(mutex)) { + res = 0; + } + if (res != 0) { + PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res); + } + return res; +} + +PTHREAD_ALWAYS_INLINE +static inline int +_pthread_mutex_check_init(_pthread_mutex *mutex) +{ + int res = 0; + if (!_pthread_mutex_check_signature(mutex)) { + return _pthread_mutex_check_init_slow(mutex); + } + return res; +} + +PTHREAD_ALWAYS_INLINE +static inline bool +_pthread_mutex_is_fairshare(_pthread_mutex *mutex) +{ + return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FAIRSHARE); +} + +PTHREAD_ALWAYS_INLINE +static inline bool +_pthread_mutex_is_firstfit(_pthread_mutex *mutex) +{ + return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT); +} + +PTHREAD_ALWAYS_INLINE +static inline bool +_pthread_mutex_is_recursive(_pthread_mutex *mutex) +{ + return (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE); +} + +PTHREAD_ALWAYS_INLINE +static int +_pthread_mutex_lock_handle_options(_pthread_mutex *mutex, bool trylock, + uint64_t *tidaddr) +{ + if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) { + // NORMAL does not do EDEADLK checking + return 0; + } + + uint64_t selfid = _pthread_selfid_direct(); + if (os_atomic_load(tidaddr, relaxed) == selfid) { + if (_pthread_mutex_is_recursive(mutex)) { + if (mutex->mtxopts.options.lock_count < USHRT_MAX) { + mutex->mtxopts.options.lock_count += 1; + return mutex->mtxopts.options.lock_count; + } else { + return -EAGAIN; + } + } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */ + // as per OpenGroup, trylock cannot + // return EDEADLK on a deadlock, it should return EBUSY. + return -EBUSY; + } else { /* PTHREAD_MUTEX_ERRORCHECK */ + return -EDEADLK; + } + } + + // Not recursive, or recursive but first lock. + return 0; +} + +PTHREAD_ALWAYS_INLINE +static int +_pthread_mutex_unlock_handle_options(_pthread_mutex *mutex, uint64_t *tidaddr) +{ + if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) { + // NORMAL does not do EDEADLK checking + return 0; + } + + uint64_t selfid = _pthread_selfid_direct(); + if (os_atomic_load(tidaddr, relaxed) != selfid) { + return -EPERM; + } else if (_pthread_mutex_is_recursive(mutex) && + --mutex->mtxopts.options.lock_count) { + return 1; + } + return 0; +} + /* * Sequence numbers and TID: * @@ -444,11 +607,9 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex) */ PTHREAD_ALWAYS_INLINE static inline int -_pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, - uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp) +_pthread_mutex_fairshare_unlock_updatebits(_pthread_mutex *mutex, + uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp) { - bool firstfit = (mutex->mtxopts.options.policy == - _PTHREAD_MUTEX_POLICY_FIRSTFIT); uint32_t flags = mutex->mtxopts.value; flags &= ~_PTHREAD_MTX_OPT_NOTIFY; // no notification by default @@ -462,27 +623,24 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, MUTEX_GETTID_ADDR(mutex, &tidaddr); uint64_t oldtid, newtid; - if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) { - uint64_t selfid = _pthread_selfid_direct(); - if (os_atomic_load(tidaddr, relaxed) != selfid) { - PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, EPERM); - return EPERM; - } else if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE && - --mutex->mtxopts.options.lock_count) { - PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1); - if (flagsp != NULL) { - *flagsp = flags; - } - return 0; + int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr); + if (res > 0) { + // Valid recursive unlock + if (flagsp) { + *flagsp = flags; } + PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1); + return 0; + } else if (res < 0) { + PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res); + return -res; } - bool clearprepost, clearnotify, spurious; + bool clearnotify, spurious; do { newseq = oldseq; oldtid = os_atomic_load(tidaddr, relaxed); - clearprepost = false; clearnotify = false; spurious = false; @@ -504,13 +662,7 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, clearnotify = true; newtid = 0; // clear owner } else { - if (firstfit) { - // reset E bit so another can acquire meanwhile - newseq.lgenval &= ~PTH_RWL_EBIT; - newtid = 0; - } else { - newtid = PTHREAD_MTX_TID_SWITCHING; - } + newtid = PTHREAD_MTX_TID_SWITCHING; // need to signal others waiting for mutex flags |= _PTHREAD_MTX_OPT_NOTIFY; } @@ -530,21 +682,12 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, if (clearnotify || spurious) { flags &= ~_PTHREAD_MTX_OPT_NOTIFY; - if (firstfit && (newseq.lgenval & PTH_RWL_PBIT)) { - clearprepost = true; - newseq.lgenval &= ~PTH_RWL_PBIT; - } } } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release)); PTHREAD_TRACE(psynch_mutex_unlock_updatebits, mutex, oldseq.lgenval, newseq.lgenval, oldtid); - if (clearprepost) { - __psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0, - newseq.lgenval, flags | _PTHREAD_MTX_OPT_MUTEX); - } - if (mgenp != NULL) { *mgenp = newseq.lgenval; } @@ -561,20 +704,11 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, return 0; } -PTHREAD_NOEXPORT PTHREAD_NOINLINE -int -_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp, - uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp) -{ - return _pthread_mutex_unlock_updatebits(mutex, flagsp, pmtxp, mgenp, ugenp); -} - PTHREAD_ALWAYS_INLINE static inline int -_pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid) +_pthread_mutex_fairshare_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid) { - bool firstfit = (mutex->mtxopts.options.policy == - _PTHREAD_MUTEX_POLICY_FIRSTFIT); + bool firstfit = _pthread_mutex_is_firstfit(mutex); bool gotlock = true; mutex_seq *seqaddr; @@ -585,11 +719,9 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid) uint64_t *tidaddr; MUTEX_GETTID_ADDR(mutex, &tidaddr); - uint64_t oldtid; do { newseq = oldseq; - oldtid = os_atomic_load(tidaddr, relaxed); if (firstfit) { // firstfit locks can have the lock stolen out from under a locker @@ -605,17 +737,14 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid) newseq.lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT; } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, - relaxed)); + acquire)); if (gotlock) { - if (!os_atomic_cmpxchg(tidaddr, oldtid, selfid, relaxed)) { - // we own this mutex, nobody should be updating it except us - return _pthread_mutex_corruption_abort(mutex); - } + os_atomic_store(tidaddr, selfid, relaxed); } PTHREAD_TRACE(psynch_mutex_lock_updatebits, mutex, oldseq.lgenval, - newseq.lgenval, oldtid); + newseq.lgenval, 0); // failing to take the lock in firstfit returns 1 to force the caller // to wait in the kernel @@ -624,114 +753,36 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid) PTHREAD_NOINLINE static int -_pthread_mutex_markprepost(_pthread_mutex *mutex, uint32_t updateval) -{ - mutex_seq *seqaddr; - MUTEX_GETSEQ_ADDR(mutex, &seqaddr); - - mutex_seq oldseq, newseq; - mutex_seq_load(seqaddr, &oldseq); - - bool clearprepost; - do { - clearprepost = false; - newseq = oldseq; - - /* update the bits */ - if ((oldseq.lgenval & PTHRW_COUNT_MASK) == - (oldseq.ugenval & PTHRW_COUNT_MASK)) { - clearprepost = true; - newseq.lgenval &= ~PTH_RWL_PBIT; - } else { - newseq.lgenval |= PTH_RWL_PBIT; - } - } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, relaxed)); - - if (clearprepost) { - __psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0, - newseq.lgenval, mutex->mtxopts.value | _PTHREAD_MTX_OPT_MUTEX); - } - - return 0; -} - -PTHREAD_NOINLINE -static int -_pthread_mutex_check_init_slow(pthread_mutex_t *omutex) -{ - int res = EINVAL; - _pthread_mutex *mutex = (_pthread_mutex *)omutex; - - if (_pthread_mutex_check_signature_init(mutex)) { - _PTHREAD_LOCK(mutex->lock); - if (_pthread_mutex_check_signature_init(mutex)) { - // initialize a statically initialized mutex to provide - // compatibility for misbehaving applications. - // (unlock should not be the first operation on a mutex) - res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf)); - } else if (_pthread_mutex_check_signature(mutex)) { - res = 0; - } - _PTHREAD_UNLOCK(mutex->lock); - } else if (_pthread_mutex_check_signature(mutex)) { - res = 0; - } - if (res != 0) { - PLOCKSTAT_MUTEX_ERROR(omutex, res); - } - return res; -} - -PTHREAD_ALWAYS_INLINE -static inline int -_pthread_mutex_check_init(pthread_mutex_t *omutex) -{ - int res = 0; - _pthread_mutex *mutex = (_pthread_mutex *)omutex; - - if (!_pthread_mutex_check_signature(mutex)) { - return _pthread_mutex_check_init_slow(omutex); - } - return res; -} - -PTHREAD_NOINLINE -static int -_pthread_mutex_lock_wait(pthread_mutex_t *omutex, mutex_seq newseq, +_pthread_mutex_fairshare_lock_wait(_pthread_mutex *mutex, mutex_seq newseq, uint64_t oldtid) { - _pthread_mutex *mutex = (_pthread_mutex *)omutex; - uint64_t *tidaddr; MUTEX_GETTID_ADDR(mutex, &tidaddr); uint64_t selfid = _pthread_selfid_direct(); - PLOCKSTAT_MUTEX_BLOCK(omutex); + PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex); do { uint32_t updateval; do { - updateval = __psynch_mutexwait(omutex, newseq.lgenval, + updateval = __psynch_mutexwait(mutex, newseq.lgenval, newseq.ugenval, oldtid, mutex->mtxopts.value); oldtid = os_atomic_load(tidaddr, relaxed); } while (updateval == (uint32_t)-1); // returns 0 on succesful update; in firstfit it may fail with 1 - } while (_pthread_mutex_lock_updatebits(mutex, selfid) == 1); - PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT); + } while (_pthread_mutex_fairshare_lock_updatebits(mutex, selfid) == 1); + PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT); return 0; } PTHREAD_NOEXPORT PTHREAD_NOINLINE int -_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock) +_pthread_mutex_fairshare_lock_slow(_pthread_mutex *omutex, bool trylock) { int res, recursive = 0; _pthread_mutex *mutex = (_pthread_mutex *)omutex; - res = _pthread_mutex_check_init(omutex); - if (res != 0) return res; - mutex_seq *seqaddr; MUTEX_GETSEQ_ADDR(mutex, &seqaddr); @@ -742,25 +793,14 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock) MUTEX_GETTID_ADDR(mutex, &tidaddr); uint64_t oldtid, selfid = _pthread_selfid_direct(); - if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) { - if (os_atomic_load(tidaddr, relaxed) == selfid) { - if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) { - if (mutex->mtxopts.options.lock_count < USHRT_MAX) { - mutex->mtxopts.options.lock_count++; - recursive = 1; - res = 0; - } else { - res = EAGAIN; - } - } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */ - // as per OpenGroup, trylock cannot - // return EDEADLK on a deadlock, it should return EBUSY. - res = EBUSY; - } else { /* PTHREAD_MUTEX_ERRORCHECK */ - res = EDEADLK; - } - goto out; - } + res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr); + if (res > 0) { + recursive = 1; + res = 0; + goto out; + } else if (res < 0) { + res = -res; + goto out; } bool gotlock; @@ -797,44 +837,39 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock) } else { PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, omutex, newseq.lgenval, newseq.ugenval, oldtid); - res = _pthread_mutex_lock_wait(omutex, newseq, oldtid); + res = _pthread_mutex_fairshare_lock_wait(mutex, newseq, oldtid); PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, omutex, newseq.lgenval, newseq.ugenval, oldtid); } - if (res == 0 && mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) { + if (res == 0 && _pthread_mutex_is_recursive(mutex)) { mutex->mtxopts.options.lock_count = 1; } out: #if PLOCKSTAT if (res == 0) { - PLOCKSTAT_MUTEX_ACQUIRE(omutex, recursive, 0); + PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0); } else { - PLOCKSTAT_MUTEX_ERROR(omutex, res); + PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res); } #endif return res; } -PTHREAD_ALWAYS_INLINE +PTHREAD_NOINLINE static inline int -_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock) +_pthread_mutex_fairshare_lock(_pthread_mutex *mutex, bool trylock) { #if ENABLE_USERSPACE_TRACE - return _pthread_mutex_lock_slow(omutex, trylock); + return _pthread_mutex_fairshare_lock_slow(mutex, trylock); #elif PLOCKSTAT if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) { - return _pthread_mutex_lock_slow(omutex, trylock); + return _pthread_mutex_fairshare_lock_slow(mutex, trylock); } #endif - _pthread_mutex *mutex = (_pthread_mutex *)omutex; - if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) { - return _pthread_mutex_lock_slow(omutex, trylock); - } - uint64_t *tidaddr; MUTEX_GETTID_ADDR(mutex, &tidaddr); uint64_t selfid = _pthread_selfid_direct(); @@ -846,7 +881,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock) mutex_seq_load(seqaddr, &oldseq); if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) { - return _pthread_mutex_lock_slow(omutex, trylock); + return _pthread_mutex_fairshare_lock_slow(mutex, trylock); } bool gotlock; @@ -865,7 +900,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock) newseq.lgenval += PTHRW_INC; newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT; } else { - return _pthread_mutex_lock_slow(omutex, trylock); + return _pthread_mutex_fairshare_lock_slow(mutex, trylock); } } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire))); @@ -880,45 +915,24 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock) } } -PTHREAD_NOEXPORT_VARIANT -int -pthread_mutex_lock(pthread_mutex_t *mutex) -{ - return _pthread_mutex_lock(mutex, false); -} - -PTHREAD_NOEXPORT_VARIANT -int -pthread_mutex_trylock(pthread_mutex_t *mutex) -{ - return _pthread_mutex_lock(mutex, true); -} - -/* - * Unlock a mutex. - * TODO: Priority inheritance stuff - */ - PTHREAD_NOINLINE static int -_pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq, +_pthread_mutex_fairshare_unlock_drop(_pthread_mutex *mutex, mutex_seq newseq, uint32_t flags) { int res; - _pthread_mutex *mutex = (_pthread_mutex *)omutex; - uint32_t updateval; uint64_t *tidaddr; MUTEX_GETTID_ADDR(mutex, &tidaddr); - PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, omutex, newseq.lgenval, + PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, mutex, newseq.lgenval, newseq.ugenval, os_atomic_load(tidaddr, relaxed)); - updateval = __psynch_mutexdrop(omutex, newseq.lgenval, newseq.ugenval, + updateval = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval, os_atomic_load(tidaddr, relaxed), flags); - PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, omutex, updateval, 0, 0); + PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, mutex, updateval, 0, 0); if (updateval == (uint32_t)-1) { res = errno; @@ -930,9 +944,6 @@ _pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq, PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res); } return res; - } else if ((mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT) - && (updateval & PTH_RWL_PBIT)) { - return _pthread_mutex_markprepost(mutex, updateval); } return 0; @@ -940,49 +951,39 @@ _pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq, PTHREAD_NOEXPORT PTHREAD_NOINLINE int -_pthread_mutex_unlock_slow(pthread_mutex_t *omutex) +_pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex) { int res; - _pthread_mutex *mutex = (_pthread_mutex *)omutex; mutex_seq newseq; uint32_t flags; - // Initialize static mutexes for compatibility with misbehaving - // applications (unlock should not be the first operation on a mutex). - res = _pthread_mutex_check_init(omutex); - if (res != 0) return res; - - res = _pthread_mutex_unlock_updatebits(mutex, &flags, NULL, &newseq.lgenval, - &newseq.ugenval); + res = _pthread_mutex_fairshare_unlock_updatebits(mutex, &flags, NULL, + &newseq.lgenval, &newseq.ugenval); if (res != 0) return res; if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) { - return _pthread_mutex_unlock_drop(omutex, newseq, flags); + return _pthread_mutex_fairshare_unlock_drop(mutex, newseq, flags); } else { uint64_t *tidaddr; MUTEX_GETTID_ADDR(mutex, &tidaddr); - PTHREAD_TRACE(psynch_mutex_uunlock, omutex, newseq.lgenval, + PTHREAD_TRACE(psynch_mutex_uunlock, mutex, newseq.lgenval, newseq.ugenval, os_atomic_load(tidaddr, relaxed)); } return 0; } -PTHREAD_NOEXPORT_VARIANT -int -pthread_mutex_unlock(pthread_mutex_t *omutex) +PTHREAD_NOINLINE +static int +_pthread_mutex_fairshare_unlock(_pthread_mutex *mutex) { #if ENABLE_USERSPACE_TRACE - return _pthread_mutex_unlock_slow(omutex); + return _pthread_mutex_fairshare_unlock_slow(mutex); #elif PLOCKSTAT if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) { - return _pthread_mutex_unlock_slow(omutex); + return _pthread_mutex_fairshare_unlock_slow(mutex); } #endif - _pthread_mutex *mutex = (_pthread_mutex *)omutex; - if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) { - return _pthread_mutex_unlock_slow(omutex); - } uint64_t *tidaddr; MUTEX_GETTID_ADDR(mutex, &tidaddr); @@ -1012,13 +1013,15 @@ pthread_mutex_unlock(pthread_mutex_t *omutex) if (os_likely((oldseq.lgenval & PTHRW_COUNT_MASK) == (newseq.ugenval & PTHRW_COUNT_MASK))) { - // our unlock sequence matches to lock sequence, so if the - // CAS is successful, the mutex is unlocked + // if we succeed in performing the CAS we can be sure of a fast + // path (only needing the CAS) unlock, if: + // a. our lock and unlock sequence are equal + // b. we don't need to clear an unlock prepost from the kernel // do not reset Ibit, just K&E newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT); } else { - return _pthread_mutex_unlock_slow(omutex); + return _pthread_mutex_fairshare_unlock_slow(mutex); } } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release))); @@ -1026,6 +1029,468 @@ pthread_mutex_unlock(pthread_mutex_t *omutex) return 0; } +#pragma mark firstfit + +PTHREAD_ALWAYS_INLINE +static inline int +_pthread_mutex_firstfit_unlock_updatebits(_pthread_mutex *mutex, + uint32_t *flagsp, uint32_t **mutexp, uint32_t *lvalp, uint32_t *uvalp) +{ + uint32_t flags = mutex->mtxopts.value & ~_PTHREAD_MTX_OPT_NOTIFY; + bool kernel_wake; + + mutex_seq *seqaddr; + MUTEX_GETSEQ_ADDR(mutex, &seqaddr); + + mutex_seq oldseq, newseq; + mutex_seq_load(seqaddr, &oldseq); + + uint64_t *tidaddr; + MUTEX_GETTID_ADDR(mutex, &tidaddr); + uint64_t oldtid; + + int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr); + if (res > 0) { + // Valid recursive unlock + if (flagsp) { + *flagsp = flags; + } + PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1); + return 0; + } else if (res < 0) { + PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res); + return -res; + } + + do { + newseq = oldseq; + oldtid = os_atomic_load(tidaddr, relaxed); + // More than one kernel waiter means we need to do a wake. + kernel_wake = diff_genseq(oldseq.lgenval, oldseq.ugenval) > 0; + newseq.lgenval &= ~PTH_RWL_EBIT; + + if (kernel_wake) { + // Going to the kernel post-unlock removes a single waiter unlock + // from the mutex counts. + newseq.ugenval += PTHRW_INC; + } + + if (oldtid != 0) { + if (!os_atomic_cmpxchg(tidaddr, oldtid, 0, relaxed)) { + return _pthread_mutex_corruption_abort(mutex); + } + } + } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release)); + + PTHREAD_TRACE(psynch_ffmutex_unlock_updatebits, mutex, oldseq.lgenval, + newseq.lgenval, newseq.ugenval); + + if (kernel_wake) { + // We choose to return this out via flags because the condition + // variable also uses this to determine whether to do a kernel wake + // when beginning a cvwait. + flags |= _PTHREAD_MTX_OPT_NOTIFY; + } + if (lvalp) { + *lvalp = newseq.lgenval; + } + if (uvalp) { + *uvalp = newseq.ugenval; + } + if (mutexp) { + *mutexp = (uint32_t *)mutex; + } + if (flagsp) { + *flagsp = flags; + } + return 0; +} + +PTHREAD_NOEXPORT PTHREAD_NOINLINE +static int +_pthread_mutex_firstfit_wake(_pthread_mutex *mutex, mutex_seq newseq, + uint32_t flags) +{ + PTHREAD_TRACE(psynch_ffmutex_wake, mutex, newseq.lgenval, newseq.ugenval, + 0); + int res = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval, 0, + flags); + + if (res == -1) { + res = errno; + if (res == EINTR) { + res = 0; + } + if (res != 0) { + PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res); + } + return res; + } + return 0; +} + +PTHREAD_NOEXPORT PTHREAD_NOINLINE +int +_pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex) +{ + mutex_seq newseq; + uint32_t flags; + int res; + + res = _pthread_mutex_firstfit_unlock_updatebits(mutex, &flags, NULL, + &newseq.lgenval, &newseq.ugenval); + if (res != 0) return res; + + if (flags & _PTHREAD_MTX_OPT_NOTIFY) { + return _pthread_mutex_firstfit_wake(mutex, newseq, flags); + } + return 0; +} + +PTHREAD_ALWAYS_INLINE +static bool +_pthread_mutex_firstfit_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid, + mutex_seq *newseqp) +{ + bool gotlock; + + mutex_seq *seqaddr; + MUTEX_GETSEQ_ADDR(mutex, &seqaddr); + + mutex_seq oldseq, newseq; + mutex_seq_load(seqaddr, &oldseq); + + uint64_t *tidaddr; + MUTEX_GETTID_ADDR(mutex, &tidaddr); + + PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex, + oldseq.lgenval, oldseq.ugenval, 0); + + do { + newseq = oldseq; + gotlock = is_rwl_ebit_clear(oldseq.lgenval); + + if (gotlock) { + // If we see the E-bit cleared, we should just attempt to take it. + newseq.lgenval |= PTH_RWL_EBIT; + } else { + // If we failed to get the lock then we need to put ourselves back + // in the queue of waiters. The previous unlocker that woke us out + // of the kernel consumed the S-count for our previous wake. So + // take another ticket on L and go back in the kernel to sleep. + newseq.lgenval += PTHRW_INC; + } + } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire)); + + if (gotlock) { + os_atomic_store(tidaddr, selfid, relaxed); + } + + PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex, + newseq.lgenval, newseq.ugenval, 0); + + if (newseqp) { + *newseqp = newseq; + } + return gotlock; +} + +PTHREAD_NOINLINE +static int +_pthread_mutex_firstfit_lock_wait(_pthread_mutex *mutex, mutex_seq newseq, + uint64_t oldtid) +{ + uint64_t *tidaddr; + MUTEX_GETTID_ADDR(mutex, &tidaddr); + uint64_t selfid = _pthread_selfid_direct(); + + PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex); + do { + uint32_t uval; + do { + PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_START, mutex, + newseq.lgenval, newseq.ugenval, mutex->mtxopts.value); + uval = __psynch_mutexwait(mutex, newseq.lgenval, newseq.ugenval, + oldtid, mutex->mtxopts.value); + PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_END, mutex, + uval, 0, 0); + oldtid = os_atomic_load(tidaddr, relaxed); + } while (uval == (uint32_t)-1); + } while (!_pthread_mutex_firstfit_lock_updatebits(mutex, selfid, &newseq)); + PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT); + + return 0; +} + +PTHREAD_NOEXPORT PTHREAD_NOINLINE +int +_pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock) +{ + int res, recursive = 0; + + mutex_seq *seqaddr; + MUTEX_GETSEQ_ADDR(mutex, &seqaddr); + + mutex_seq oldseq, newseq; + mutex_seq_load(seqaddr, &oldseq); + + uint64_t *tidaddr; + MUTEX_GETTID_ADDR(mutex, &tidaddr); + uint64_t oldtid, selfid = _pthread_selfid_direct(); + + res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr); + if (res > 0) { + recursive = 1; + res = 0; + goto out; + } else if (res < 0) { + res = -res; + goto out; + } + + PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex, + oldseq.lgenval, oldseq.ugenval, 0); + + bool gotlock; + do { + newseq = oldseq; + oldtid = os_atomic_load(tidaddr, relaxed); + + gotlock = is_rwl_ebit_clear(oldseq.lgenval); + if (trylock && !gotlock) { + // We still want to perform the CAS here, even though it won't + // do anything so that it fails if someone unlocked while we were + // in the loop + } else if (gotlock) { + // In first-fit, getting the lock simply adds the E-bit + newseq.lgenval |= PTH_RWL_EBIT; + } else { + // Failed to get the lock, increment the L-val and go to + // the kernel to sleep + newseq.lgenval += PTHRW_INC; + } + } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire)); + + PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex, + newseq.lgenval, newseq.ugenval, 0); + + if (gotlock) { + os_atomic_store(tidaddr, selfid, relaxed); + res = 0; + PTHREAD_TRACE(psynch_mutex_ulock, mutex, newseq.lgenval, + newseq.ugenval, selfid); + } else if (trylock) { + res = EBUSY; + PTHREAD_TRACE(psynch_mutex_utrylock_failed, mutex, newseq.lgenval, + newseq.ugenval, oldtid); + } else { + PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, mutex, + newseq.lgenval, newseq.ugenval, oldtid); + res = _pthread_mutex_firstfit_lock_wait(mutex, newseq, oldtid); + PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, mutex, + newseq.lgenval, newseq.ugenval, oldtid); + } + + if (res == 0 && _pthread_mutex_is_recursive(mutex)) { + mutex->mtxopts.options.lock_count = 1; + } + +out: +#if PLOCKSTAT + if (res == 0) { + PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0); + } else { + PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res); + } +#endif + return res; +} + +#pragma mark fast path + +PTHREAD_NOEXPORT PTHREAD_NOINLINE +int +_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp, + uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp) +{ + if (_pthread_mutex_is_fairshare(mutex)) { + return _pthread_mutex_fairshare_unlock_updatebits(mutex, flagsp, + pmtxp, mgenp, ugenp); + } + return _pthread_mutex_firstfit_unlock_updatebits(mutex, flagsp, pmtxp, + mgenp, ugenp); +} + +PTHREAD_NOEXPORT PTHREAD_NOINLINE +int +_pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock) +{ + int res; + + res = _pthread_mutex_check_init(mutex); + if (res != 0) return res; + + if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) { + return _pthread_mutex_fairshare_lock_slow(mutex, trylock); + } + return _pthread_mutex_firstfit_lock_slow(mutex, trylock); +} + +PTHREAD_NOEXPORT PTHREAD_NOINLINE +static int +_pthread_mutex_unlock_init_slow(_pthread_mutex *mutex) +{ + int res; + + // Initialize static mutexes for compatibility with misbehaving + // applications (unlock should not be the first operation on a mutex). + res = _pthread_mutex_check_init(mutex); + if (res != 0) return res; + + if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) { + return _pthread_mutex_fairshare_unlock_slow(mutex); + } + return _pthread_mutex_firstfit_unlock_slow(mutex); +} + +PTHREAD_NOEXPORT_VARIANT +int +pthread_mutex_unlock(pthread_mutex_t *omutex) +{ + _pthread_mutex *mutex = (_pthread_mutex *)omutex; + if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) { + return _pthread_mutex_unlock_init_slow(mutex); + } + + if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) { + return _pthread_mutex_fairshare_unlock(mutex); + } + +#if ENABLE_USERSPACE_TRACE + return _pthread_mutex_firstfit_unlock_slow(mutex); +#elif PLOCKSTAT + if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) { + return _pthread_mutex_firstfit_unlock_slow(mutex); + } +#endif + + /* + * This is the first-fit fast path. The fairshare fast-ish path is in + * _pthread_mutex_firstfit_unlock() + */ + uint64_t *tidaddr; + MUTEX_GETTID_ADDR(mutex, &tidaddr); + + mutex_seq *seqaddr; + MUTEX_GETSEQ_ADDR(mutex, &seqaddr); + + mutex_seq oldseq, newseq; + mutex_seq_load(seqaddr, &oldseq); + + // We're giving up the mutex one way or the other, so go ahead and + // update the owner to 0 so that once the CAS below succeeds, there + // is no stale ownership information. If the CAS of the seqaddr + // fails, we may loop, but it's still valid for the owner to be + // SWITCHING/0 + os_atomic_store(tidaddr, 0, relaxed); + + do { + newseq = oldseq; + + if (diff_genseq(oldseq.lgenval, oldseq.ugenval) == 0) { + // No outstanding waiters in kernel, we can simply drop the E-bit + // and return. + newseq.lgenval &= ~PTH_RWL_EBIT; + } else { + return _pthread_mutex_firstfit_unlock_slow(mutex); + } + } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, + release))); + + return 0; +} + +PTHREAD_ALWAYS_INLINE +static inline int +_pthread_mutex_firstfit_lock(pthread_mutex_t *omutex, bool trylock) +{ + _pthread_mutex *mutex = (_pthread_mutex *)omutex; + if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) { + return _pthread_mutex_lock_init_slow(mutex, trylock); + } + + if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) { + return _pthread_mutex_fairshare_lock(mutex, trylock); + } + +#if ENABLE_USERSPACE_TRACE + return _pthread_mutex_firstfit_lock_slow(mutex, trylock); +#elif PLOCKSTAT + if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) { + return _pthread_mutex_firstfit_lock_slow(mutex, trylock); + } +#endif + + /* + * This is the first-fit fast path. The fairshare fast-ish path is in + * _pthread_mutex_firstfit_lock() + */ + uint64_t *tidaddr; + MUTEX_GETTID_ADDR(mutex, &tidaddr); + uint64_t selfid = _pthread_selfid_direct(); + + mutex_seq *seqaddr; + MUTEX_GETSEQ_ADDR(mutex, &seqaddr); + + mutex_seq oldseq, newseq; + mutex_seq_load(seqaddr, &oldseq); + + if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) { + return _pthread_mutex_firstfit_lock_slow(mutex, trylock); + } + + bool gotlock; + do { + newseq = oldseq; + gotlock = is_rwl_ebit_clear(oldseq.lgenval); + + if (trylock && !gotlock) { + // A trylock on a held lock will fail immediately. But since + // we did not load the sequence words atomically, perform a + // no-op CAS64 to ensure that nobody has unlocked concurrently. + } else if (os_likely(gotlock)) { + // In first-fit, getting the lock simply adds the E-bit + newseq.lgenval |= PTH_RWL_EBIT; + } else { + return _pthread_mutex_firstfit_lock_slow(mutex, trylock); + } + } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, + acquire))); + + if (os_likely(gotlock)) { + os_atomic_store(tidaddr, selfid, relaxed); + return 0; + } else if (trylock) { + return EBUSY; + } else { + __builtin_trap(); + } +} + +PTHREAD_NOEXPORT_VARIANT +int +pthread_mutex_lock(pthread_mutex_t *mutex) +{ + return _pthread_mutex_firstfit_lock(mutex, false); +} + +PTHREAD_NOEXPORT_VARIANT +int +pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + return _pthread_mutex_firstfit_lock(mutex, true); +} + PTHREAD_ALWAYS_INLINE static inline int @@ -1040,7 +1505,7 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, } mutex->prioceiling = (int16_t)attr->prioceiling; mutex->mtxopts.options.protocol = attr->protocol; - mutex->mtxopts.options.policy = attr->policy; + mutex->mtxopts.options.policy = attr->opt; mutex->mtxopts.options.type = attr->type; mutex->mtxopts.options.pshared = attr->pshared; } else { @@ -1063,9 +1528,9 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, mutex->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING; mutex->mtxopts.options.protocol = _PTHREAD_DEFAULT_PROTOCOL; if (static_type != 3) { - mutex->mtxopts.options.policy = __pthread_mutex_default_policy; + mutex->mtxopts.options.policy = __pthread_mutex_default_opt_policy; } else { - mutex->mtxopts.options.policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT; + mutex->mtxopts.options.policy = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT; } mutex->mtxopts.options.pshared = _PTHREAD_DEFAULT_PSHARED; } @@ -1089,7 +1554,8 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, long sig = _PTHREAD_MUTEX_SIG; if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL && - mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FAIRSHARE) { + (_pthread_mutex_is_fairshare(mutex) || + _pthread_mutex_is_firstfit(mutex))) { // rdar://18148854 _pthread_mutex_lock & pthread_mutex_unlock fastpath sig = _PTHREAD_MUTEX_SIG_fast; } diff --git a/src/pthread_rwlock.c b/src/pthread_rwlock.c index 85358df..5b0bc9a 100644 --- a/src/pthread_rwlock.c +++ b/src/pthread_rwlock.c @@ -61,8 +61,6 @@ #include // for bzero #endif -extern int __unix_conforming; - #ifdef PLOCKSTAT #include "plockstat.h" #else /* !PLOCKSTAT */ @@ -513,7 +511,7 @@ _pthread_rwlock_updateval(_pthread_rwlock *rwlock, uint32_t updateval) rwlock_seq_load(seqaddr, &oldseq, RWLOCK_SEQ_LS); do { newseq = oldseq; - if (isoverlap || is_rws_setunlockinit(oldseq.rw_seq) != 0) { + if (isoverlap || is_rws_unlockinit_set(oldseq.rw_seq)) { // Set S word to the specified value uint32_t savebits = (oldseq.rw_seq & PTHRW_RWS_SAVEMASK); newseq.lcntval = _pthread_rwlock_modbits(oldseq.lcntval, updateval, @@ -763,7 +761,7 @@ retry: newseq.lcntval |= PTH_RWL_KBIT | PTH_RWL_WBIT; } newseq.lcntval += PTHRW_INC; - if (is_rws_setseq(oldseq.rw_seq)) { + if (is_rws_sbit_set(oldseq.rw_seq)) { // Clear the S bit and set S to L newseq.rw_seq &= (PTHRW_BIT_MASK & ~PTH_RWS_SBIT); newseq.rw_seq |= (oldseq.lcntval & PTHRW_COUNT_MASK); diff --git a/src/pthread_tsd.c b/src/pthread_tsd.c index 3a77266..54b1bb0 100644 --- a/src/pthread_tsd.c +++ b/src/pthread_tsd.c @@ -61,12 +61,13 @@ // __pthread_tsd_end is the end of dynamic keys. static const int __pthread_tsd_first = __TSD_RESERVED_MAX + 1; -static int __pthread_tsd_max = __pthread_tsd_first; static const int __pthread_tsd_start = _INTERNAL_POSIX_THREAD_KEYS_MAX; static const int __pthread_tsd_end = _INTERNAL_POSIX_THREAD_KEYS_END; -static int __pthread_key_legacy_behaviour = 0; -static int __pthread_key_legacy_behaviour_log = 0; +static int __pthread_tsd_max = __pthread_tsd_first; +static _pthread_lock __pthread_tsd_lock = _PTHREAD_LOCK_INITIALIZER; +static bool __pthread_key_legacy_behaviour = 0; +static bool __pthread_key_legacy_behaviour_log = 0; // Omit support for pthread key destructors in the static archive for dyld. // dyld does not create and destroy threads so these are not necessary. @@ -80,15 +81,17 @@ static struct { uintptr_t destructor; } _pthread_keys[_INTERNAL_POSIX_THREAD_KEYS_END]; -static _pthread_lock tsd_lock = _PTHREAD_LOCK_INITIALIZER; - // The pthread_tsd destruction order can be reverted to the old (pre-10.11) order // by setting this environment variable. void _pthread_key_global_init(const char *envp[]) { - __pthread_key_legacy_behaviour = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER") ? 1 : 0; - __pthread_key_legacy_behaviour_log = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG") ? 1 : 0; + if (_simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER")) { + __pthread_key_legacy_behaviour = true; + } + if (_simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG")) { + __pthread_key_legacy_behaviour_log = true; + } } // Returns true if successful, false if destructor was already set. @@ -133,7 +136,7 @@ pthread_key_create(pthread_key_t *key, void (*destructor)(void *)) int res = EAGAIN; // Returns EAGAIN if key cannot be allocated. pthread_key_t k; - _PTHREAD_LOCK(tsd_lock); + _PTHREAD_LOCK(__pthread_tsd_lock); for (k = __pthread_tsd_start; k < __pthread_tsd_end; k++) { if (_pthread_key_set_destructor(k, destructor)) { *key = k; @@ -141,7 +144,7 @@ pthread_key_create(pthread_key_t *key, void (*destructor)(void *)) break; } } - _PTHREAD_UNLOCK(tsd_lock); + _PTHREAD_UNLOCK(__pthread_tsd_lock); return res; } @@ -151,12 +154,12 @@ pthread_key_delete(pthread_key_t key) { int res = EINVAL; // Returns EINVAL if key is not allocated. - _PTHREAD_LOCK(tsd_lock); + _PTHREAD_LOCK(__pthread_tsd_lock); if (key >= __pthread_tsd_start && key < __pthread_tsd_end) { if (_pthread_key_unset_destructor(key)) { struct _pthread *p; _PTHREAD_LOCK(_pthread_list_lock); - TAILQ_FOREACH(p, &__pthread_head, plist) { + TAILQ_FOREACH(p, &__pthread_head, tl_plist) { // No lock for word-sized write. p->tsd[key] = 0; } @@ -164,7 +167,7 @@ pthread_key_delete(pthread_key_t key) res = 0; } } - _PTHREAD_UNLOCK(tsd_lock); + _PTHREAD_UNLOCK(__pthread_tsd_lock); return res; } @@ -188,7 +191,7 @@ pthread_setspecific(pthread_key_t key, const void *value) _pthread_key_set_destructor(key, NULL); } if (key > self->max_tsd_key) { - self->max_tsd_key = (int)key; + self->max_tsd_key = (uint16_t)key; } } } @@ -342,12 +345,12 @@ pthread_key_init_np(int key, void (*destructor)(void *)) { int res = EINVAL; // Returns EINVAL if key is out of range. if (key >= __pthread_tsd_first && key < __pthread_tsd_start) { - _PTHREAD_LOCK(tsd_lock); + _PTHREAD_LOCK(__pthread_tsd_lock); _pthread_key_set_destructor(key, destructor); if (key > __pthread_tsd_max) { __pthread_tsd_max = key; } - _PTHREAD_UNLOCK(tsd_lock); + _PTHREAD_UNLOCK(__pthread_tsd_lock); res = 0; } return res; diff --git a/src/qos.c b/src/qos.c index b31098a..ef36089 100644 --- a/src/qos.c +++ b/src/qos.c @@ -35,8 +35,6 @@ #include "workqueue_private.h" #include "qos_private.h" -static pthread_priority_t _main_qos = QOS_CLASS_UNSPECIFIED; - #define PTHREAD_OVERRIDE_SIGNATURE (0x6f766572) #define PTHREAD_OVERRIDE_SIG_DEAD (0x7265766f) @@ -49,172 +47,145 @@ struct pthread_override_s bool malloced; }; -void -_pthread_set_main_qos(pthread_priority_t qos) +thread_qos_t +_pthread_qos_class_to_thread_qos(qos_class_t qos) { - _main_qos = qos; + switch (qos) { + case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE; + case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED; + case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY; + case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY; + case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND; + case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE; + default: return THREAD_QOS_UNSPECIFIED; + } } -int -pthread_attr_set_qos_class_np(pthread_attr_t *__attr, - qos_class_t __qos_class, - int __relative_priority) -{ - if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) { - return ENOTSUP; - } +static inline qos_class_t +_pthread_qos_class_from_thread_qos(thread_qos_t tqos) +{ + static const qos_class_t thread_qos_to_qos_class[THREAD_QOS_LAST] = { + [THREAD_QOS_UNSPECIFIED] = QOS_CLASS_UNSPECIFIED, + [THREAD_QOS_MAINTENANCE] = QOS_CLASS_MAINTENANCE, + [THREAD_QOS_BACKGROUND] = QOS_CLASS_BACKGROUND, + [THREAD_QOS_UTILITY] = QOS_CLASS_UTILITY, + [THREAD_QOS_LEGACY] = QOS_CLASS_DEFAULT, + [THREAD_QOS_USER_INITIATED] = QOS_CLASS_USER_INITIATED, + [THREAD_QOS_USER_INTERACTIVE] = QOS_CLASS_USER_INTERACTIVE, + }; + if (os_unlikely(tqos >= THREAD_QOS_LAST)) return QOS_CLASS_UNSPECIFIED; + return thread_qos_to_qos_class[tqos]; +} - if (__relative_priority > 0 || __relative_priority < QOS_MIN_RELATIVE_PRIORITY) { - return EINVAL; +static inline thread_qos_t +_pthread_validate_qos_class_and_relpri(qos_class_t qc, int relpri) +{ + if (relpri > 0 || relpri < QOS_MIN_RELATIVE_PRIORITY) { + return THREAD_QOS_UNSPECIFIED; } + return _pthread_qos_class_to_thread_qos(qc); +} - int ret = EINVAL; - if (__attr->sig == _PTHREAD_ATTR_SIG) { - if (!__attr->schedset) { - __attr->qosclass = _pthread_priority_make_newest(__qos_class, __relative_priority, 0); - __attr->qosset = 1; - ret = 0; - } - } +static inline void +_pthread_priority_split(pthread_priority_t pp, qos_class_t *qc, int *relpri) +{ + thread_qos_t qos = _pthread_priority_thread_qos(pp); + if (qc) *qc = _pthread_qos_class_from_thread_qos(qos); + if (relpri) *relpri = _pthread_priority_relpri(pp); +} - return ret; +void +_pthread_set_main_qos(pthread_priority_t qos) +{ + _main_qos = (uint32_t)qos; } int -pthread_attr_get_qos_class_np(pthread_attr_t * __restrict __attr, - qos_class_t * __restrict __qos_class, - int * __restrict __relative_priority) +pthread_attr_set_qos_class_np(pthread_attr_t *attr, qos_class_t qc, int relpri) { - if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) { - return ENOTSUP; + thread_qos_t qos = _pthread_validate_qos_class_and_relpri(qc, relpri); + if (attr->sig != _PTHREAD_ATTR_SIG || attr->schedset) { + return EINVAL; } - int ret = EINVAL; - if (__attr->sig == _PTHREAD_ATTR_SIG) { - if (__attr->qosset) { - qos_class_t qos; int relpri; - _pthread_priority_split_newest(__attr->qosclass, qos, relpri); + attr->qosclass = _pthread_priority_make_from_thread_qos(qos, relpri, 0); + attr->qosset = 1; + attr->schedset = 0; + return 0; +} - if (__qos_class) { *__qos_class = qos; } - if (__relative_priority) { *__relative_priority = relpri; } - } else { - if (__qos_class) { *__qos_class = 0; } - if (__relative_priority) { *__relative_priority = 0; } - } - ret = 0; +int +pthread_attr_get_qos_class_np(pthread_attr_t *attr, qos_class_t *qc, int *relpri) +{ + if (attr->sig != _PTHREAD_ATTR_SIG) { + return EINVAL; } - return ret; + _pthread_priority_split(attr->qosset ? attr->qosclass : 0, qc, relpri); + return 0; } int -pthread_set_qos_class_self_np(qos_class_t __qos_class, - int __relative_priority) +pthread_set_qos_class_self_np(qos_class_t qc, int relpri) { - if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) { - return ENOTSUP; - } - - if (__relative_priority > 0 || __relative_priority < QOS_MIN_RELATIVE_PRIORITY) { + thread_qos_t qos = _pthread_validate_qos_class_and_relpri(qc, relpri); + if (!qos) { return EINVAL; } - pthread_priority_t priority = _pthread_priority_make_newest(__qos_class, __relative_priority, 0); - - if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) { - return _pthread_set_properties_self(_PTHREAD_SET_SELF_QOS_FLAG, priority, 0); - } else { - /* We set the thread QoS class in the TSD and then call into the kernel to - * read the value out of it and set the QoS class. - */ - _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority); - mach_port_t kport = _pthread_kernel_thread(pthread_self()); - int res = __bsdthread_ctl(BSDTHREAD_CTL_SET_QOS, kport, &pthread_self()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS], 0); - - if (res == -1) { - res = errno; - } - - return res; - } + pthread_priority_t pp = _pthread_priority_make_from_thread_qos(qos, relpri, 0); + return _pthread_set_properties_self(_PTHREAD_SET_SELF_QOS_FLAG, pp, 0); } int -pthread_set_qos_class_np(pthread_t __pthread, - qos_class_t __qos_class, - int __relative_priority) +pthread_set_qos_class_np(pthread_t thread, qos_class_t qc, int relpri) { - if (__pthread != pthread_self()) { + if (thread != pthread_self()) { /* The kext now enforces this anyway, if we check here too, it allows us to call * _pthread_set_properties_self later if we can. */ return EPERM; } - - return pthread_set_qos_class_self_np(__qos_class, __relative_priority); + return pthread_set_qos_class_self_np(qc, relpri); } int -pthread_get_qos_class_np(pthread_t __pthread, - qos_class_t * __restrict __qos_class, - int * __restrict __relative_priority) +pthread_get_qos_class_np(pthread_t thread, qos_class_t *qc, int *relpri) { - if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) { - return ENOTSUP; - } - - pthread_priority_t priority; - - if (__pthread == pthread_self()) { - priority = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS); - } else { - priority = __pthread->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS]; - } - - qos_class_t qos; int relpri; - _pthread_priority_split_newest(priority, qos, relpri); - - if (__qos_class) { *__qos_class = qos; } - if (__relative_priority) { *__relative_priority = relpri; } - + pthread_priority_t pp = thread->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS]; + _pthread_priority_split(pp, qc, relpri); return 0; } qos_class_t qos_class_self(void) { - if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) { - return QOS_CLASS_UNSPECIFIED; - } - - pthread_priority_t p = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS); - qos_class_t c = _pthread_priority_get_qos_newest(p); - - return c; + pthread_priority_t pp; + pp = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS); + return _pthread_qos_class_from_thread_qos(_pthread_priority_thread_qos(pp)); } qos_class_t qos_class_main(void) { - return _pthread_priority_get_qos_newest(_main_qos); + pthread_priority_t pp = _main_qos; + return _pthread_qos_class_from_thread_qos(_pthread_priority_thread_qos(pp)); } pthread_priority_t -_pthread_qos_class_encode(qos_class_t qos_class, int relative_priority, unsigned long flags) +_pthread_qos_class_encode(qos_class_t qc, int relpri, unsigned long flags) { - return _pthread_priority_make_newest(qos_class, relative_priority, flags); + thread_qos_t qos = _pthread_qos_class_to_thread_qos(qc); + return _pthread_priority_make_from_thread_qos(qos, relpri, flags); } qos_class_t -_pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, unsigned long *flags) +_pthread_qos_class_decode(pthread_priority_t pp, int *relpri, unsigned long *flags) { - qos_class_t qos; int relpri; - - _pthread_priority_split_newest(priority, qos, relpri); - - if (relative_priority) { *relative_priority = relpri; } - if (flags) { *flags = _pthread_priority_get_flags(priority); } - return qos; + qos_class_t qc; + _pthread_priority_split(pp, &qc, relpri); + if (flags) *flags = (pp & _PTHREAD_PRIORITY_FLAGS_MASK); + return qc; } // Encode a legacy workqueue API priority into a pthread_priority_t. This API @@ -222,35 +193,48 @@ _pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, u pthread_priority_t _pthread_qos_class_encode_workqueue(int queue_priority, unsigned long flags) { + thread_qos_t qos; switch (queue_priority) { - case WORKQ_HIGH_PRIOQUEUE: - return _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, flags); - case WORKQ_DEFAULT_PRIOQUEUE: - return _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, flags); - case WORKQ_LOW_PRIOQUEUE: + case WORKQ_HIGH_PRIOQUEUE: qos = THREAD_QOS_USER_INTERACTIVE; break; + case WORKQ_DEFAULT_PRIOQUEUE: qos = THREAD_QOS_LEGACY; break; case WORKQ_NON_INTERACTIVE_PRIOQUEUE: - return _pthread_priority_make_newest(QOS_CLASS_UTILITY, 0, flags); - case WORKQ_BG_PRIOQUEUE: - return _pthread_priority_make_newest(QOS_CLASS_BACKGROUND, 0, flags); - /* Legacy dispatch does not use QOS_CLASS_MAINTENANCE, so no need to handle it here */ + case WORKQ_LOW_PRIOQUEUE: qos = THREAD_QOS_UTILITY; break; + case WORKQ_BG_PRIOQUEUE: qos = THREAD_QOS_BACKGROUND; break; default: __pthread_abort(); } + return _pthread_priority_make_from_thread_qos(qos, 0, flags); } +#define _PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP \ + (_PTHREAD_SET_SELF_QOS_FLAG | _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG | \ + _PTHREAD_SET_SELF_TIMESHARE_FLAG) + int -_pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t priority, mach_port_t voucher) +_pthread_set_properties_self(_pthread_set_flags_t flags, + pthread_priority_t priority, mach_port_t voucher) { - if (!(__pthread_supported_features & PTHREAD_FEATURE_SETSELF)) { - return ENOTSUP; + pthread_t self = pthread_self(); + _pthread_set_flags_t kflags = flags; + int rv = 0; + + if (self->wqoutsideqos && (flags & _PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP)) { + // A number of properties cannot be altered if we are a workloop + // thread that has outside of QoS properties applied to it. + kflags &= ~_PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP; + if (kflags == 0) goto skip; } - int rv = __bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, flags); + rv = __bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, kflags); - /* Set QoS TSD if we succeeded or only failed the voucher half. */ +skip: + // Set QoS TSD if we succeeded, or only failed the voucher portion of the + // call. Additionally, if we skipped setting QoS because of outside-of-QoS + // attributes then we still want to set the TSD in userspace. if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) { if (rv == 0 || errno == ENOENT) { - _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority); + _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, + priority); } } @@ -263,37 +247,21 @@ _pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t prio int pthread_set_fixedpriority_self(void) { - if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) { - return ENOTSUP; - } - - if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) { - return _pthread_set_properties_self(_PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG, 0, 0); - } else { - return ENOTSUP; - } + return _pthread_set_properties_self(_PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG, 0, 0); } int pthread_set_timeshare_self(void) { - if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) { - return ENOTSUP; - } - - if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) { - return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0); - } else { - return ENOTSUP; - } + return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0); } - pthread_override_t -pthread_override_qos_class_start_np(pthread_t __pthread, qos_class_t __qos_class, int __relative_priority) +pthread_override_qos_class_start_np(pthread_t thread, qos_class_t qc, int relpri) { pthread_override_t rv; kern_return_t kr; + thread_qos_t qos; int res = 0; /* For now, we don't have access to malloc. So we'll have to vm_allocate this, which means the tiny struct is going @@ -301,23 +269,30 @@ pthread_override_qos_class_start_np(pthread_t __pthread, qos_class_t __qos_clas */ bool did_malloc = true; + qos = _pthread_validate_qos_class_and_relpri(qc, relpri); + if (qos == THREAD_QOS_UNSPECIFIED) { + return (_Nonnull pthread_override_t)NULL; + } + mach_vm_address_t vm_addr = malloc(sizeof(struct pthread_override_s)); if (!vm_addr) { vm_addr = vm_page_size; did_malloc = false; - kr = mach_vm_allocate(mach_task_self(), &vm_addr, round_page(sizeof(struct pthread_override_s)), VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH) | VM_FLAGS_ANYWHERE); + kr = mach_vm_allocate(mach_task_self(), &vm_addr, + round_page(sizeof(struct pthread_override_s)), + VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH) | VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) { errno = ENOMEM; - return (_Nonnull pthread_override_t) NULL; + return (_Nonnull pthread_override_t)NULL; } } rv = (pthread_override_t)vm_addr; rv->sig = PTHREAD_OVERRIDE_SIGNATURE; - rv->pthread = __pthread; - rv->kthread = pthread_mach_thread_np(__pthread); - rv->priority = _pthread_priority_make_newest(__qos_class, __relative_priority, 0); + rv->pthread = thread; + rv->kthread = pthread_mach_thread_np(thread); + rv->priority = _pthread_priority_make_from_thread_qos(qos, relpri, 0); rv->malloced = did_malloc; /* To ensure that the kernel port that we keep stays valid, we retain it here. */ @@ -342,7 +317,7 @@ pthread_override_qos_class_start_np(pthread_t __pthread, qos_class_t __qos_clas } rv = NULL; } - return (_Nonnull pthread_override_t) rv; + return (_Nonnull pthread_override_t)rv; } int @@ -523,7 +498,11 @@ _pthread_workqueue_parallelism_for_priority(int qos, unsigned long flags) int pthread_qos_max_parallelism(qos_class_t qos, unsigned long flags) { - int thread_qos = _pthread_qos_class_to_thread_qos(qos); + thread_qos_t thread_qos; + if (qos == QOS_CLASS_UNSPECIFIED) { + qos = QOS_CLASS_DEFAULT; // + } + thread_qos = _pthread_qos_class_to_thread_qos(qos); if (thread_qos == THREAD_QOS_UNSPECIFIED) { errno = EINVAL; return -1; diff --git a/src/thread_setup.c b/src/thread_setup.c index 761103e..22cc689 100644 --- a/src/thread_setup.c +++ b/src/thread_setup.c @@ -45,23 +45,24 @@ * MkLinux */ +#include "internal.h" + +#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288 /* * Machine specific support for thread initialization */ -#include "internal.h" -#include +// NOTE: no resolvers, so this file must not contain any atomic operations +PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp); /* * Set up the initial state of a MACH thread */ void -_pthread_setup(pthread_t thread, +_pthread_setup_suspended(pthread_t thread, void (*routine)(pthread_t), - void *vsp, - int suspended, - int needresume) + void *vsp) { #if defined(__i386__) i386_thread_state_t state = { }; @@ -71,20 +72,12 @@ _pthread_setup(pthread_t thread, x86_thread_state64_t state = { }; thread_state_flavor_t flavor = x86_THREAD_STATE64; mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT; -#elif defined(__arm__) - arm_thread_state_t state = { }; - thread_state_flavor_t flavor = ARM_THREAD_STATE; - mach_msg_type_number_t count = ARM_THREAD_STATE_COUNT; #else #error _pthread_setup not defined for this architecture #endif - if (suspended) { - (void)thread_get_state(_pthread_kernel_thread(thread), - flavor, - (thread_state_t)&state, - &count); - } + (void)thread_get_state(_pthread_kernel_thread(thread), + flavor, (thread_state_t)&state, &count); #if defined(__i386__) uintptr_t *sp = vsp; @@ -110,46 +103,10 @@ _pthread_setup(pthread_t thread, state.__rdi = (uintptr_t)thread; // argument to function *--sp = 0; // fake return address state.__rsp = (uintptr_t)sp; // set stack pointer -#elif defined(__arm__) - state.__pc = (uintptr_t)routine; - - // Detect switch to thumb mode. - if (state.__pc & 1) { - state.__pc &= ~1; - state.__cpsr |= 0x20; /* PSR_THUMB */ - } - - state.__sp = (uintptr_t)vsp - C_ARGSAVE_LEN - C_RED_ZONE; - state.__r[0] = (uintptr_t)thread; #else -#error _pthread_setup not defined for this architecture +#error _pthread_setup_suspended not defined for this architecture #endif - if (suspended) { - (void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count); - if (needresume) { - (void)thread_resume(_pthread_kernel_thread(thread)); - } - } else { - mach_port_t kernel_thread; - (void)thread_create_running(mach_task_self(), flavor, (thread_state_t)&state, count, &kernel_thread); - _pthread_set_kernel_thread(thread, kernel_thread); - } -} - -// pthread_setup initializes large structures to 0, which the compiler turns into a library call to memset. To avoid linking against -// Libc, provide a simple wrapper that calls through to the libplatform primitives - -#undef memset -__attribute__((visibility("hidden"))) void * -memset(void *b, int c, size_t len) -{ - return _platform_memset(b, c, len); -} - -#undef bzero -__attribute__((visibility("hidden"))) void -bzero(void *s, size_t n) -{ - _platform_bzero(s, n); + (void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count); } +#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX diff --git a/tests/Makefile b/tests/Makefile index 408b101..84e2717 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -12,10 +12,13 @@ include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.common TARGETS := TARGETS += atfork TARGETS += bsdthread_set_self +TARGETS += stack +TARGETS += stack_size TARGETS += cond #TARGETS += cond_hang3 #TARGETS += cond_stress TARGETS += cond_timed +TARGETS += cond_prepost TARGETS += custom_stack TARGETS += stack_aslr TARGETS += join @@ -24,6 +27,7 @@ TARGETS += main_stack_custom TARGETS += detach #TARGETS += maxwidth TARGETS += mutex +TARGETS += mutex_prepost TARGETS += mutex_try TARGETS += once_cancel TARGETS += pthread_attr_setstacksize @@ -35,6 +39,7 @@ TARGETS += pthread_introspection TARGETS += pthread_setspecific TARGETS += pthread_threadid_np TARGETS += pthread_get_qos_class_np +TARGETS += pthread_dependency #TARGETS += qos TARGETS += rdar_32848402 #TARGETS += rwlock-22244050 @@ -47,6 +52,7 @@ TARGETS += tsd #TARGETS += wq_kevent_stress TARGETS += wq_limits TARGETS += add_timer_termination +TARGETS += perf_contended_mutex_rwlock OTHER_LTE_INCLUDE_FILES += \ /usr/local/lib/libdarwintest_utils.dylib @@ -54,7 +60,7 @@ OTHER_LTE_INCLUDE_FILES += \ OTHER_CFLAGS := -DDARWINTEST -Weverything \ -Wno-vla -Wno-bad-function-cast -Wno-missing-noreturn \ -Wno-missing-field-initializers -Wno-format-pedantic \ - -Wno-gnu-folding-constant + -Wno-gnu-folding-constant -Wno-used-but-marked-unused OTHER_LDFLAGS := -ldarwintest_utils #TARGETS += main_stack_legacy // Disabled by default due to linker warnings @@ -63,8 +69,8 @@ OTHER_LDFLAGS := -ldarwintest_utils #main_stack_legacy: ARCH_FLAGS = -arch i386 #main_stack_legacy: DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=10.7 -main_stack_custom: OTHER_LDFLAGS += -Wl,-stack_size,0x14000 -main_stack_custom: OTHER_CFLAGS += -DSTACKSIZE=0x14000 +main_stack_custom: OTHER_LDFLAGS += -Wl,-stack_size,0x124000 +main_stack_custom: OTHER_CFLAGS += -DSTACKSIZE=0x124000 bsdthread_set_self: OTHER_CFLAGS += -D_DARWIN_FEATURE_CLOCK_GETTIME diff --git a/tests/cond_prepost.c b/tests/cond_prepost.c new file mode 100644 index 0000000..df8b86e --- /dev/null +++ b/tests/cond_prepost.c @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "darwintest_defaults.h" +#include + +// this test case is intended to test for the +// specific issue found in this radar. That is, if: +// +// 1. A mutex is in first-fit policy mode, and +// 2. is used as the mutex in a pthread_cond_wait (or timedwait), and +// 3. the mutex has the K-bit set but has no kernel waiters, and +// 4. the cvwait call preposts an unlock to the mutex +// +// Under these conditions, the fact that the cvwait preposted an unlock to +// the paired mutex is lost during the call. The P-bit was never returned to +// userspace and the kwq in the kernel would continue to exist. If the same +// uaddr is then reused as another synchroniser type then we would often +// return EINVAL from the wait/lock function. +// +// So this test is attempting to: +// +// 1. Repeatedly bang on a mutex+cvar for a number of iterations in the +// hope of triggering a cvwait prepost situation. +// 2. Then destroy both the mutex and cvar, and reinitialise each memory +// location as the opposite type of synchroniser. Then cvwait once to +// trigger the failure condition. + +struct context { + union { + pthread_mutex_t mutex; + pthread_cond_t cond; + }; + union { + pthread_mutex_t mutex2; + pthread_cond_t cond2; + }; + long value; + long count; + long waiter; +}; + +static void *test_cond(void *ptr) { + struct context *context = ptr; + int res; + + res = pthread_cond_wait(&context->cond, &context->mutex2); + T_ASSERT_POSIX_ZERO(res, "condition wait on condvar completed"); + res = pthread_mutex_unlock(&context->mutex2); + T_ASSERT_POSIX_ZERO(res, "unlock condvar mutex"); + return NULL; +} + +static void *test_cond_wake(void *ptr) { + struct context *context = ptr; + int res; + + res = pthread_mutex_lock(&context->mutex2); + T_ASSERT_POSIX_ZERO(res, "locked condvar mutex"); + res = pthread_cond_signal(&context->cond); + T_ASSERT_POSIX_ZERO(res, "condvar signalled"); + res = pthread_mutex_unlock(&context->mutex2); + T_ASSERT_POSIX_ZERO(res, "dropped condvar mutex"); + + return NULL; +} + +static void *test_thread(void *ptr) { + int res; + long old; + struct context *context = ptr; + + int i = 0; + char *str; + + do { + bool try = i++ & 1; + bool cond = i & 16; + + if (!try){ + str = "pthread_mutex_lock"; + res = pthread_mutex_lock(&context->mutex); + } else { + str = "pthread_mutex_trylock"; + res = pthread_mutex_trylock(&context->mutex); + } + if (res != 0) { + if (try && res == EBUSY) { + continue; + } + T_ASSERT_POSIX_ZERO(res, "[%ld] %s", context->count, str); + } + + old = __sync_fetch_and_or(&context->value, 1); + if ((old & 1) != 0) { + T_FAIL("[%ld] OR %lx\n", context->count, old); + } + + old = __sync_fetch_and_and(&context->value, 0); + if ((old & 1) == 0) { + T_FAIL("[%ld] AND %lx\n", context->count, old); + } + + if (cond && !context->waiter) { + context->waiter = 1; + struct timespec ts = { + .tv_sec = 0, + .tv_nsec = 10ull * NSEC_PER_MSEC, + }; + + res = pthread_cond_timedwait_relative_np(&context->cond2, &context->mutex, &ts); + if (res == ETIMEDOUT) { + // ignore, should be the last thread out + } else if (res) { + T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_cond_wait", + context->count); + } + context->waiter = 0; + res = pthread_mutex_unlock(&context->mutex); + if (res) { + T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_unlock", + context->count); + } + } else { + if (context->waiter) { + res = pthread_cond_broadcast(&context->cond2); + if (res) { + T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_cond_broadcast", + context->count); + } + } + res = pthread_mutex_unlock(&context->mutex); + if (res) { + T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_unlock", + context->count); + } + } + } while (__sync_fetch_and_sub(&context->count, 1) > 0); + return NULL; +} + + +static void +_test_condvar_prepost_race(void) +{ + struct context context = { + .mutex = PTHREAD_MUTEX_INITIALIZER, + .cond2 = PTHREAD_COND_INITIALIZER, + .value = 0, + .count = 10000, + .waiter = false, + }; + int i; + int res; + int threads = 8; + pthread_t p[threads]; + for (i = 0; i < threads; ++i) { + res = pthread_create(&p[i], NULL, test_thread, &context); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()"); + } + for (i = 0; i < threads; ++i) { + res = pthread_join(p[i], NULL); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()"); + } + + T_PASS("initial pthread mutex storm completed"); + + pthread_mutex_destroy(&context.mutex); + pthread_cond_destroy(&context.cond2); + + pthread_mutex_init(&context.mutex2, NULL); + pthread_cond_init(&context.cond, NULL); + res = pthread_mutex_lock(&context.mutex2); + T_ASSERT_POSIX_ZERO(res, "mutex lock for condition wait"); + res = pthread_create(&p[0], NULL, test_cond, &context); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()"); + res = pthread_create(&p[1], NULL, test_cond_wake, &context); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()"); + + res = pthread_join(p[0], NULL); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()"); + res = pthread_join(p[1], NULL); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()"); + + pthread_cond_destroy(&context.cond); +} + +T_DECL(cond_prepost_fairshare, "cond_prepost_fairshare (fairshare)", + T_META_ALL_VALID_ARCHS(YES), + T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=1")) +{ + int i; + int count = 100; + for (i=0; i < count; i++) { + _test_condvar_prepost_race(); + } +} + +T_DECL(cond_prepost_firstfit, "cond_prepost_firstfit (firstfit)", + T_META_ALL_VALID_ARCHS(YES), + T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3")) +{ + int i; + int count = 100; + for (i=0; i < count; i++) { + _test_condvar_prepost_race(); + } +} diff --git a/tests/main_stack_custom.c b/tests/main_stack_custom.c index eb0d660..2e992a8 100644 --- a/tests/main_stack_custom.c +++ b/tests/main_stack_custom.c @@ -14,7 +14,7 @@ T_DECL(main_stack_custom, "tests the reported values for a custom main thread st struct rlimit lim; T_QUIET; T_ASSERT_POSIX_SUCCESS(getrlimit(RLIMIT_STACK, &lim), NULL); - lim.rlim_cur = lim.rlim_cur / 8; + lim.rlim_cur = lim.rlim_cur + 32 * PAGE_SIZE; T_EXPECT_EQ(setrlimit(RLIMIT_STACK, &lim), -1, "setrlimit for stack should fail with custom stack"); T_EXPECT_EQ((size_t)STACKSIZE, pthread_get_stacksize_np(pthread_self()), "reported stacksize shouldn't change"); } diff --git a/tests/mutex.c b/tests/mutex.c index 0b1e1d4..9fe0277 100644 --- a/tests/mutex.c +++ b/tests/mutex.c @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -104,7 +105,7 @@ check_process_default_mutex_policy(int expected_policy) T_DECL(mutex_default_policy, "Tests that the default mutex policy is fairshare") { - check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FAIRSHARE); + check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT); } T_DECL(mutex_default_policy_sysctl, @@ -133,7 +134,7 @@ T_HELPER_DECL(mutex_default_policy_sysctl_helper, "sysctl helper") T_DECL(mutex_default_policy_envvar, "Tests that setting the policy environment variable changes the default policy", - T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=2")) + T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3")) { check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT); } diff --git a/tests/mutex_prepost.c b/tests/mutex_prepost.c new file mode 100644 index 0000000..6423e20 --- /dev/null +++ b/tests/mutex_prepost.c @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "darwintest_defaults.h" +#include + +struct context { + union { + pthread_mutex_t mutex; + pthread_cond_t cond; + }; + pthread_mutex_t mutex2; + long value; + long count; +}; + +static void *test_cond(void *ptr) { + struct context *context = ptr; + int res; + + res = pthread_cond_wait(&context->cond, &context->mutex2); + T_ASSERT_POSIX_ZERO(res, "condition wait on condvar completed"); + res = pthread_mutex_unlock(&context->mutex2); + T_ASSERT_POSIX_ZERO(res, "unlock condvar mutex"); + return NULL; +} + +static void *test_cond_wake(void *ptr) { + struct context *context = ptr; + int res; + + res = pthread_mutex_lock(&context->mutex2); + T_ASSERT_POSIX_ZERO(res, "locked condvar mutex"); + res = pthread_cond_signal(&context->cond); + T_ASSERT_POSIX_ZERO(res, "condvar signalled"); + res = pthread_mutex_unlock(&context->mutex2); + T_ASSERT_POSIX_ZERO(res, "dropped condvar mutex"); + + return NULL; +} + +static void *test_thread(void *ptr) { + int res; + long old; + struct context *context = ptr; + + int i = 0; + char *str; + + do { + bool try = i++ & 1; + + if (!try){ + str = "pthread_mutex_lock"; + res = pthread_mutex_lock(&context->mutex); + } else { + str = "pthread_mutex_trylock"; + res = pthread_mutex_trylock(&context->mutex); + } + if (res != 0) { + if (try && res == EBUSY) { + continue; + } + T_ASSERT_POSIX_ZERO(res, "[%ld] %s", context->count, str); + } + + old = __sync_fetch_and_or(&context->value, 1); + if ((old & 1) != 0) { + T_FAIL("[%ld] OR %lx\n", context->count, old); + } + + old = __sync_fetch_and_and(&context->value, 0); + if ((old & 1) == 0) { + T_FAIL("[%ld] AND %lx\n", context->count, old); + } + + res = pthread_mutex_unlock(&context->mutex); + if (res) { + T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_lock", context->count); + } + } while (__sync_fetch_and_sub(&context->count, 1) > 0); + return NULL; +} + + +static void +_test_condvar_prepost_race(void) +{ + struct context context = { + .mutex = PTHREAD_MUTEX_INITIALIZER, + .mutex2 = PTHREAD_MUTEX_INITIALIZER, + .value = 0, + .count = 1000, + }; + int i; + int res; + int threads = 8; + pthread_t p[threads]; + for (i = 0; i < threads; ++i) { + res = pthread_create(&p[i], NULL, test_thread, &context); + T_ASSERT_POSIX_ZERO(res, "pthread_create()"); + } + for (i = 0; i < threads; ++i) { + res = pthread_join(p[i], NULL); + T_ASSERT_POSIX_ZERO(res, "pthread_join()"); + } + + T_PASS("initial pthread mutex storm completed"); + + pthread_mutex_destroy(&context.mutex); + + pthread_cond_init(&context.cond, NULL); + res = pthread_mutex_lock(&context.mutex2); + T_ASSERT_POSIX_ZERO(res, "mutex lock for condition wait"); + res = pthread_create(&p[0], NULL, test_cond, &context); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()"); + res = pthread_create(&p[1], NULL, test_cond_wake, &context); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()"); + + res = pthread_join(p[0], NULL); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()"); + res = pthread_join(p[1], NULL); + T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()"); + + pthread_cond_destroy(&context.cond); +} + +T_DECL(mutex_prepost_fairshare, "pthread_mutex_prepost (fairshare)", + T_META_ALL_VALID_ARCHS(YES), + T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=1")) +{ + int i; + int count = 100; + for (i=0; i < count; i++) { + _test_condvar_prepost_race(); + } +} + +T_DECL(mutex_prepost_firstfit, "pthread_mutex_prepost (firstfit)", + T_META_ALL_VALID_ARCHS(YES), + T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3")) +{ + int i; + int count = 100; + for (i=0; i < count; i++) { + _test_condvar_prepost_race(); + } +} diff --git a/tests/perf_contended_mutex_rwlock.c b/tests/perf_contended_mutex_rwlock.c new file mode 100644 index 0000000..e4219c5 --- /dev/null +++ b/tests/perf_contended_mutex_rwlock.c @@ -0,0 +1,519 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// number of times the lock is taken per dt_stat batch +#define ITERATIONS_PER_DT_STAT_BATCH 10000ull +// number of times the contended mutex is taken per dt_stat batch +#define ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX 1000ull +// shift determining power of 2 factor of time spent by worker threads in the +// busy() function while outside of the lock vs inside the lock +#define OUTER_VS_INNER_SHIFT 4 +// fraction of read lock vs write lock acquires +#define RDLOCK_FRACTION 0.99f +// maintain and print progress counters in between measurement batches +#define COUNTERS 0 + +// move the darwintest assertion code out of the straight line execution path +// since it is has non-trivial overhead and codegen impact even if the assertion +// is never triggered. +#define iferr(_e) if(__builtin_expect(!!(_e), 0)) + +#pragma mark - + +uint64_t +random_busy_counts(unsigned int *seed, uint64_t *inner, uint64_t *outer) +{ + uint64_t random = rand_r(seed); + const uint64_t of = (1 << OUTER_VS_INNER_SHIFT); + *inner = 0x4 + (random & (0x10 - 1)); + *outer = 0x4 * of + ((random >> 4) & (0x10 * of - 1)); + return random; +} + +// By default busy() does cpu busy work for a passed in number of iterations +enum { + busy_is_nothing = 0, + busy_is_cpu_busy, + busy_is_cpu_yield, +}; +static int busy_select = busy_is_cpu_busy; + +static double +cpu_busy(uint64_t n) +{ + double d = M_PI; + uint64_t i; + for (i = 0; i < n; i++) d *= M_PI; + return d; +} + +static double +cpu_yield(uint64_t n) +{ + uint64_t i; + for (i = 0; i < n; i++) { +#if defined(__arm__) || defined(__arm64__) + asm volatile("yield"); +#elif defined(__x86_64__) || defined(__i386__) + asm volatile("pause"); +#else +#error Unrecognized architecture +#endif + } + return 0; +} + +__attribute__((noinline)) +static double +busy(uint64_t n) +{ + switch(busy_select) { + case busy_is_cpu_busy: + return cpu_busy(n); + case busy_is_cpu_yield: + return cpu_yield(n); + default: + return 0; + } +} + +#pragma mark - + +static semaphore_t ready_sem, start_sem, end_sem; +static uint32_t nthreads; +static _Atomic uint32_t active_thr; +static _Atomic int64_t todo; +uint64_t iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH; + +#if COUNTERS +static _Atomic uint64_t total_locks, total_rdlocks, total_wrlocks; +#define ctr_inc(_t) atomic_fetch_add_explicit(&(_t), 1, memory_order_relaxed) +#else +#define ctr_inc(_t) +#endif + +static uint32_t +ncpu(void) +{ + static uint32_t activecpu, physicalcpu; + if (!activecpu) { + uint32_t n; + size_t s = sizeof(n); + sysctlbyname("hw.activecpu", &n, &s, NULL, 0); + activecpu = n; + s = sizeof(n); + sysctlbyname("hw.physicalcpu", &n, &s, NULL, 0); + physicalcpu = n; + } + return MIN(activecpu, physicalcpu); +} + +__attribute__((noinline)) +static void +threaded_bench(dt_stat_time_t s, int batch_size) +{ + kern_return_t kr; + for (int i = 0; i < nthreads; i++) { + kr = semaphore_wait(ready_sem); + iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");} + } + atomic_init(&active_thr, nthreads); + atomic_init(&todo, batch_size * iterations_per_dt_stat_batch); + dt_stat_token t = dt_stat_begin(s); + kr = semaphore_signal_all(start_sem); + iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");} + kr = semaphore_wait(end_sem); + iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");} + dt_stat_end_batch(s, batch_size, t); +} + +static void +setup_threaded_bench(void* (*thread_fn)(void*), bool singlethreaded) +{ + kern_return_t kr; + int r; + char *e; + + if (singlethreaded) { + nthreads = 1; + } else { + if ((e = getenv("DT_STAT_NTHREADS"))) nthreads = strtoul(e, NULL, 0); + if (nthreads < 2) nthreads = ncpu(); + } + if ((e = getenv("DT_STAT_CPU_BUSY"))) busy_select = strtoul(e, NULL, 0); + + kr = semaphore_create(mach_task_self(), &ready_sem, SYNC_POLICY_FIFO, 0); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create"); + kr = semaphore_create(mach_task_self(), &start_sem, SYNC_POLICY_FIFO, 0); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create"); + kr = semaphore_create(mach_task_self(), &end_sem, SYNC_POLICY_FIFO, 0); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create"); + + pthread_attr_t attr; + r = pthread_attr_init(&attr); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_init"); + r = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_setdetachstate"); + + for (int i = 0; i < nthreads; i++) { + pthread_t th; + r = pthread_create(&th, &attr, thread_fn, (void *)(uintptr_t)(i+1)); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_create"); + } +} + +#pragma mark - + +static pthread_mutex_t mutex; + +static void * +mutex_bench_thread(void * arg) +{ + kern_return_t kr; + int r; + unsigned int seed; + volatile double dummy; + +restart: + seed = (uintptr_t)arg; // each thread repeats its own sequence + kr = semaphore_wait_signal(start_sem, ready_sem); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal"); + + while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) { + uint64_t inner, outer; + random_busy_counts(&seed, &inner, &outer); + dummy = busy(outer); + r = pthread_mutex_lock(&mutex); + iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");} + dummy = busy(inner); + r = pthread_mutex_unlock(&mutex); + iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");} + ctr_inc(total_locks); + } + + if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) { + kr = semaphore_signal(end_sem); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal"); + } + goto restart; +} + +static void +mutex_bench(bool singlethreaded) +{ + int r; + int batch_size; +#if COUNTERS + uint64_t batch = 0; +#endif + + setup_threaded_bench(mutex_bench_thread, singlethreaded); + + pthread_mutexattr_t attr; + r = pthread_mutexattr_init(&attr); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init"); + pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FAIRSHARE); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np"); + r = pthread_mutex_init(&mutex, &attr); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init"); + + dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & " + "pthread_mutex_unlock (fairshare) on %u thread%s", + iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : ""); + do { + batch_size = dt_stat_batch_size(s); + threaded_bench(s, batch_size); +#if COUNTERS + fprintf(stderr, "\rbatch: %4llu\t size: %4d\tmutexes: %8llu", + ++batch, batch_size, + atomic_load_explicit(&total_locks, memory_order_relaxed)); +#endif + } while (!dt_stat_stable(s)); +#if COUNTERS + fprintf(stderr, "\n"); +#endif + dt_stat_finalize(s); +} + +T_DECL(perf_uncontended_mutex_bench, "Uncontended fairshare mutex", + T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO), + T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false)) +{ + mutex_bench(true); +} + +T_DECL(perf_contended_mutex_bench, "Contended fairshare mutex", + T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO), + T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false)) +{ + iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX; + mutex_bench(false); +} + +#pragma mark - + +static pthread_rwlock_t rwlock; + +static void * +rwlock_bench_thread(void * arg) +{ + kern_return_t kr; + int r; + unsigned int seed; + volatile double dummy; + const uint64_t rand_rdlock_max = (double)RAND_MAX * RDLOCK_FRACTION; + +restart: + seed = (uintptr_t)arg; // each thread repeats its own sequence + kr = semaphore_wait_signal(start_sem, ready_sem); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal"); + + while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) { + uint64_t inner, outer; + uint64_t random = random_busy_counts(&seed, &inner, &outer); + dummy = busy(outer); + if (random < rand_rdlock_max) { + r = pthread_rwlock_rdlock(&rwlock); + iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_rdlock");} + dummy = busy(inner); + r = pthread_rwlock_unlock(&rwlock); + iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");} + ctr_inc(total_rdlocks); + } else { + r = pthread_rwlock_wrlock(&rwlock); + iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_wrlock");} + dummy = busy(inner); + r = pthread_rwlock_unlock(&rwlock); + iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");} + ctr_inc(total_wrlocks); + } + ctr_inc(total_locks); + } + + if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) { + kr = semaphore_signal(end_sem); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal"); + } + goto restart; +} + +static void +rwlock_bench(bool singlethreaded) +{ + int r; + int batch_size; +#if COUNTERS + uint64_t batch = 0; +#endif + + setup_threaded_bench(rwlock_bench_thread, singlethreaded); + + r = pthread_rwlock_init(&rwlock, NULL); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_init"); + + dt_stat_time_t s = dt_stat_time_create("%llu pthread_rwlock_rd/wrlock & " + "pthread_rwlock_unlock (%.0f%% rdlock) on %u thread%s", + iterations_per_dt_stat_batch, RDLOCK_FRACTION * 100, nthreads, + nthreads > 1 ? "s" : ""); + do { + batch_size = dt_stat_batch_size(s); + threaded_bench(s, batch_size); +#if COUNTERS + fprintf(stderr, "\rbatch: %4llu\t size: %4d\trwlocks: %8llu\t" + "rd: %8llu\twr: %8llu", ++batch, batch_size, + atomic_load_explicit(&total_locks, memory_order_relaxed), + atomic_load_explicit(&total_rdlocks, memory_order_relaxed), + atomic_load_explicit(&total_wrlocks, memory_order_relaxed)); +#endif + } while (!dt_stat_stable(s)); +#if COUNTERS + fprintf(stderr, "\n"); +#endif + dt_stat_finalize(s); +} + +T_DECL(perf_uncontended_rwlock_bench, "Uncontended rwlock", + T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO), + T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false)) +{ + rwlock_bench(true); +} + +T_DECL(perf_contended_rwlock_bench, "Contended rwlock", + T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO), + T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false)) +{ + rwlock_bench(false); +} + +#pragma mark - + +static os_unfair_lock unfair_lock; + +static void * +unfair_lock_bench_thread(void * arg) +{ + kern_return_t kr; + unsigned int seed; + volatile double dummy; + +restart: + seed = (uintptr_t)arg; // each thread repeats its own sequence + kr = semaphore_wait_signal(start_sem, ready_sem); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal"); + + while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) { + uint64_t inner, outer; + random_busy_counts(&seed, &inner, &outer); + dummy = busy(outer); + os_unfair_lock_lock(&unfair_lock); + dummy = busy(inner); + os_unfair_lock_unlock(&unfair_lock); + ctr_inc(total_locks); + } + + if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) { + kr = semaphore_signal(end_sem); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal"); + } + goto restart; +} + +static void +unfair_lock_bench(bool singlethreaded) +{ + int r; + int batch_size; +#if COUNTERS + uint64_t batch = 0; +#endif + + setup_threaded_bench(unfair_lock_bench_thread, singlethreaded); + + dt_stat_time_t s = dt_stat_time_create("%llu os_unfair_lock_lock & " + "os_unfair_lock_unlock on %u thread%s", + iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : ""); + do { + batch_size = dt_stat_batch_size(s); + threaded_bench(s, batch_size); +#if COUNTERS + fprintf(stderr, "\rbatch: %4llu\t size: %4d\tunfair_locks: %8llu", + ++batch, batch_size, + atomic_load_explicit(&total_locks, memory_order_relaxed)); +#endif + } while (!dt_stat_stable(s)); +#if COUNTERS + fprintf(stderr, "\n"); +#endif + dt_stat_finalize(s); +} + +T_DECL(perf_uncontended_unfair_lock_bench, "Unontended unfair lock", + T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO), + T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false)) +{ + unfair_lock_bench(true); +} + +T_DECL(perf_contended_unfair_lock_bench, "Contended unfair lock", + T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO), + T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false)) +{ + unfair_lock_bench(false); +} + +#pragma mark - + +static pthread_mutex_t ffmutex; + +static void * +ffmutex_bench_thread(void * arg) +{ + kern_return_t kr; + int r; + unsigned int seed; + volatile double dummy; + +restart: + seed = (uintptr_t)arg; // each thread repeats its own sequence + kr = semaphore_wait_signal(start_sem, ready_sem); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal"); + + while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) { + uint64_t inner, outer; + random_busy_counts(&seed, &inner, &outer); + dummy = busy(outer); + r = pthread_mutex_lock(&ffmutex); + iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");} + dummy = busy(inner); + r = pthread_mutex_unlock(&ffmutex); + iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");} + ctr_inc(total_locks); + } + + if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) { + kr = semaphore_signal(end_sem); + T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal"); + } + goto restart; +} + +static void +ffmutex_bench(bool singlethreaded) +{ + int r; + int batch_size; +#if COUNTERS + uint64_t batch = 0; +#endif + + setup_threaded_bench(ffmutex_bench_thread, singlethreaded); + + pthread_mutexattr_t attr; + r = pthread_mutexattr_init(&attr); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init"); + pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FIRSTFIT); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np"); + r = pthread_mutex_init(&ffmutex, &attr); + T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init"); + + dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & " + "pthread_mutex_unlock (first-fit) on %u thread%s", + iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : ""); + do { + batch_size = dt_stat_batch_size(s); + threaded_bench(s, batch_size); +#if COUNTERS + fprintf(stderr, "\rbatch: %4llu\t size: %4d\tffmutexes: %8llu", + ++batch, batch_size, + atomic_load_explicit(&total_locks, memory_order_relaxed)); +#endif + } while (!dt_stat_stable(s)); +#if COUNTERS + fprintf(stderr, "\n"); +#endif + dt_stat_finalize(s); +} + +T_DECL(perf_uncontended_ffmutex_bench, "Uncontended first-fit mutex", + T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO), + T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false)) +{ + ffmutex_bench(true); +} + +T_DECL(perf_contended_ffmutex_bench, "Contended first-fit mutex", + T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO), + T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false)) +{ + ffmutex_bench(false); +} diff --git a/tests/pthread_dependency.c b/tests/pthread_dependency.c new file mode 100644 index 0000000..a6fd316 --- /dev/null +++ b/tests/pthread_dependency.c @@ -0,0 +1,78 @@ +#include "darwintest_defaults.h" +#include +#include + +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; + +static struct job { + pthread_dependency_t *req; + useconds_t usleep; + int done; +} job; + +static void * +do_test(void *__unused arg) +{ + pthread_mutex_lock(&mutex); + + while (!job.done) { + while (job.req == 0) { + pthread_cond_wait(&cond, &mutex); + } + if (job.usleep) usleep(job.usleep); + pthread_dependency_fulfill_np(job.req, job.req); + job.req = NULL; + } + + pthread_mutex_unlock(&mutex); + return NULL; +} + +static void +post_req(pthread_dependency_t *req, useconds_t delay, bool done) +{ + pthread_mutex_lock(&mutex); + job.req = req; + job.usleep = delay; + job.done = done; + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); +} + +T_DECL(dependency, "dependency", T_META_ALL_VALID_ARCHS(YES)) +{ + pthread_dependency_t req; + pthread_t pth; + void *v; + int ret; + + T_ASSERT_POSIX_ZERO(pthread_create(&pth, NULL, do_test, NULL), NULL); + + T_LOG("Waiting on a pdependency that takes some time"); + + pthread_dependency_init_np(&req, pth, NULL); + post_req(&req, 100000, false); + v = pthread_dependency_wait_np(&req); + T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked"); + + T_LOG("Waiting on a pdependency that is already fulfilled"); + + pthread_dependency_init_np(&req, pth, NULL); + post_req(&req, 0, false); + usleep(100000); + v = pthread_dependency_wait_np(&req); + T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked"); + + T_LOG("Waiting on a fulfilled pdependency with the other thread exiting"); + + pthread_dependency_init_np(&req, pth, NULL); + post_req(&req, 0, true); + ret = pthread_join(pth, NULL); + T_EXPECT_POSIX_ZERO(ret, "pthread_join"); + + v = pthread_dependency_wait_np(&req); + T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked"); + + T_END; +} diff --git a/tests/pthread_threadid_np.c b/tests/pthread_threadid_np.c index d28ca65..19cfc25 100644 --- a/tests/pthread_threadid_np.c +++ b/tests/pthread_threadid_np.c @@ -9,7 +9,7 @@ extern __uint64_t __thread_selfid( void ); static void *do_test(void * __unused arg) { uint64_t threadid = __thread_selfid(); - T_ASSERT_NOTNULL(threadid, NULL); + T_ASSERT_NE(threadid, (uint64_t)0, "__thread_selfid()"); uint64_t pth_threadid = 0; T_ASSERT_POSIX_ZERO(pthread_threadid_np(NULL, &pth_threadid), NULL); diff --git a/tests/rdar_32848402.c b/tests/rdar_32848402.c index 65cd56e..068836a 100644 --- a/tests/rdar_32848402.c +++ b/tests/rdar_32848402.c @@ -72,7 +72,9 @@ T_DECL(thread_request_32848402, "repro for rdar://32848402") end_spin = clock_gettime_nsec_np(CLOCK_MONOTONIC) + 2 * NSEC_PER_SEC; dispatch_async_f(a, (void *)0, spin_and_pause); - for (long i = 1; i < get_ncpu(); i++) { + long n_threads = MIN((long)get_ncpu(), + pthread_qos_max_parallelism(QOS_CLASS_BACKGROUND, 0)); + for (long i = 1; i < n_threads; i++) { dispatch_async_f(b, (void *)i, spin); } diff --git a/tests/stack.c b/tests/stack.c new file mode 100644 index 0000000..f910b28 --- /dev/null +++ b/tests/stack.c @@ -0,0 +1,82 @@ +#include +#include + +#include "darwintest_defaults.h" +#include + +#if defined(__arm64__) +#define call_chkstk(value) \ + __asm__ volatile("orr x9, xzr, %0\t\n" \ + "bl _thread_chkstk_darwin" : : "i"(value) : "x9") +#define TRAPSIG SIGTRAP +#elif defined(__x86_64__) +#define call_chkstk(value) \ + __asm__ volatile("movq %0, %%rax\t\n" \ + "callq _thread_chkstk_darwin" : : "i"(value) : "rax") +#define TRAPSIG SIGILL +#elif defined(__i386__) +#define call_chkstk(value) \ + __asm__ volatile("movl %0, %%eax\t\n" \ + "calll _thread_chkstk_darwin" : : "i"(value) : "eax") +#define TRAPSIG SIGILL +#endif + +static void +got_signal(int signo __unused) +{ + T_PASS("calling with 1 << 24 crashed"); + T_END; +} + +T_DECL(chkstk, "chkstk", + T_META_ALL_VALID_ARCHS(YES), T_META_CHECK_LEAKS(NO)) +{ +#if defined(__arm__) + T_SKIP("not on armv7"); +#else + + call_chkstk(1 << 8); + T_PASS("calling with 1 << 8"); + + call_chkstk(1 << 16); + T_PASS("calling with 1 << 16"); + + signal(TRAPSIG, got_signal); + + call_chkstk(1 << 24); + T_FAIL("should have crashed"); +#endif +} + +struct frame { + uintptr_t frame; + uintptr_t ret; +}; + +OS_NOINLINE OS_NOT_TAIL_CALLED +static void +do_stack_frame_decode_test(struct frame frames[], size_t n, size_t count) +{ + if (n < count) { + frames[n].frame = (uintptr_t)__builtin_frame_address(1); + frames[n].ret = (uintptr_t)__builtin_return_address(0); + do_stack_frame_decode_test(frames, n + 1, count); + } else { + uintptr_t frame = (uintptr_t)__builtin_frame_address(1); + uintptr_t ret; + while (count-- > 0) { + frame = pthread_stack_frame_decode_np(frame, &ret); + T_EXPECT_EQ(frames[count].frame, frame, "Frame %zd", count); + T_EXPECT_EQ(frames[count].ret, ret, "Retaddr %zd", count); + } + } +} + +T_DECL(pthread_stack_frame_decode_np, "pthread_stack_frame_decode_np", + T_META_ALL_VALID_ARCHS(YES), T_META_CHECK_LEAKS(NO)) +{ + struct frame frames[10]; + frames[0].frame = (uintptr_t)__builtin_frame_address(1); + frames[0].ret = (uintptr_t)__builtin_return_address(0); + do_stack_frame_decode_test(frames, 1, 10); +} diff --git a/tests/stack_aslr.c b/tests/stack_aslr.c index a8dab42..aaf483e 100644 --- a/tests/stack_aslr.c +++ b/tests/stack_aslr.c @@ -133,7 +133,7 @@ again: for (int i = 0; i < attempts; i++) { char *t; - asprintf(&t, "%s/%zd", tmp, i); + asprintf(&t, "%s/%d", tmp, i); T_QUIET; T_ASSERT_POSIX_SUCCESS(mkdir(t, 0700), "mkdir"); setenv("BATS_TMP_DIR", t, 1); // hack to workaround rdar://33443485 free(t); @@ -144,7 +144,7 @@ again: T_QUIET; T_FAIL("Helper should complete in <.1s"); goto timeout; } - usleep(1000); + usleep(1000 * 100); } while (shmem->done <= i); } setenv("BATS_TMP_DIR", tmpdir, 1); diff --git a/tests/stack_size.c b/tests/stack_size.c new file mode 100644 index 0000000..3a52747 --- /dev/null +++ b/tests/stack_size.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include "darwintest_defaults.h" + +#define PTHREAD_T_OFFSET (0) + +static void * +function(void *arg) +{ + size_t expected_size = (size_t)(uintptr_t)arg; + T_ASSERT_EQ(pthread_get_stacksize_np(pthread_self()), expected_size, + "saw expected pthread_get_stacksize_np"); + return NULL; +} + +T_DECL(stack_size_default, "stack size of default pthread", + T_META_ALL_VALID_ARCHS(YES)) +{ + static const size_t dflsize = 512 * 1024; + pthread_t thread; + pthread_attr_t attr; + + T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL); + T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function, + (void *)(dflsize + PTHREAD_T_OFFSET)), NULL); + T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL); +} + +T_DECL(stack_size_customsize, "stack size of thread with custom stack size", + T_META_ALL_VALID_ARCHS(YES)) +{ + static const size_t stksize = 768 * 1024; + pthread_t thread; + pthread_attr_t attr; + + T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL); + T_ASSERT_POSIX_ZERO(pthread_attr_setstacksize(&attr, stksize), NULL); + T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function, + (void *)(stksize + PTHREAD_T_OFFSET)), NULL); + T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL); +} + +T_DECL(stack_size_customaddr, "stack size of thread with custom stack addr", + T_META_ALL_VALID_ARCHS(YES)) +{ + static const size_t stksize = 512 * 1024; + pthread_t thread; + pthread_attr_t attr; + + uintptr_t stackaddr = (uintptr_t)valloc(stksize); + stackaddr += stksize; // address is top of stack + + T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL); + T_ASSERT_POSIX_ZERO(pthread_attr_setstackaddr(&attr, (void *)stackaddr), + NULL); + T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function, + (void *)stksize), NULL); + T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL); + free((void *)(stackaddr - stksize)); +} + +T_DECL(stack_size_custom, "stack size of thread with custom stack addr+size", + T_META_ALL_VALID_ARCHS(YES)) +{ + static const size_t stksize = 768 * 1024; + pthread_t thread; + pthread_attr_t attr; + + uintptr_t stackaddr = (uintptr_t)valloc(stksize); + stackaddr += stksize; // address is top of stack + + T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL); + T_ASSERT_POSIX_ZERO(pthread_attr_setstackaddr(&attr, (void *)stackaddr), + NULL); + T_ASSERT_POSIX_ZERO(pthread_attr_setstacksize(&attr, stksize), NULL); + T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function, + (void *)stksize), NULL); + T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL); + free((void *)(stackaddr - stksize)); +} diff --git a/tools/locktrace.lua b/tools/locktrace.lua index ecc64bc..bb5380d 100755 --- a/tools/locktrace.lua +++ b/tools/locktrace.lua @@ -28,10 +28,26 @@ get_prefix = function(buf) local proc proc = buf.command - return string.format("%s %6.9f %-17s [%05d.%06x] %-24s", + return string.format("%s %6.9f %-17s [%05d.%06x] %-35s", prefix, secs, proc, buf.pid, buf.threadid, buf.debugname) end +get_count = function(val) + return ((val & 0xffffff00) >> 8) +end + +get_kwq_type = function(val) + if val & 0xff == 0x1 then + return "MTX" + elseif val & 0xff == 0x2 then + return "CVAR" + elseif val & 0xff == 0x4 then + return "RWL" + else + return string.format("0x%04x", val) + end +end + decode_lval = function(lval) local kbit = " " if lval & 0x1 ~= 0 then @@ -61,61 +77,282 @@ decode_sval = function(sval) end local count = sval >> 8 - return string.format("[0x%06x, %s%s]", count, ibit, sbit) + return string.format("[0x%06x, %s%s]", count, ibit, sbit) +end + +decode_cv_sval = function(sval) + local sbit = " " + if sval & 0x1 ~= 0 then + sbit = "C" + end + local ibit = " " + if sval & 0x2 ~= 0 then + ibit = "P" + end + + local count = sval >> 8 + return string.format("[0x%06x, %s%s]", count, ibit, sbit) end trace_codename("psynch_mutex_lock_updatebits", function(buf) local prefix = get_prefix(buf) if buf[4] == 0 then - printf("%s\tupdated lock bits, pre-kernel (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3])) + printf("%s\tupdated lock bits, pre-kernel\taddr: 0x%016x\toldl: %s\tnewl: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3])) else - printf("%s\tupdated lock bits, post-kernel (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3])) + printf("%s\tupdated lock bits, post-kernel\taddr: 0x%016x\toldl: %s\tnewl: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3])) end end) trace_codename("psynch_mutex_unlock_updatebits", function(buf) local prefix = get_prefix(buf) - printf("%s\tupdated unlock bits (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3])) + printf("%s\tupdated unlock bits\t\taddr: 0x%016x\toldl: %s\tnewl: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3])) +end) + +trace_codename("psynch_ffmutex_lock_updatebits", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tlock path, bits update\t\taddr: 0x%016x\toldl: %s\toldu: %s\twaiters: %d\n", + prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), get_count(buf[2]) - get_count(buf[3])) + else + printf("%s\tlock path, bits update\t\taddr: 0x%016x\tnewl: %s\tnewu: %s\twaiters: %d\n", + prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), get_count(buf[2]) - get_count(buf[3])) + end +end) + +trace_codename("psynch_ffmutex_unlock_updatebits", function(buf) + local prefix = get_prefix(buf) + printf("%s\tunlock path, update bits\taddr: 0x%016x\toldl: %s\tnewl: %s\tnewu: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4])) +end) + +trace_codename("psynch_ffmutex_wake", function(buf) + local prefix = get_prefix(buf) + printf("%s\tfirst fit kernel wake\t\taddr: 0x%016x\tlval: %s\tuval: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3])) +end) + +trace_codename("psynch_ffmutex_wait", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tfirstfit kernel wait\t\taddr: 0x%016x\tlval: %s\tuval: %s\tflags: 0x%x\n", + prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4]) + else + printf("%s\tfirstfit kernel wait\t\taddr: 0x%016x\trval: %s\n", + prefix, buf[1], decode_lval(buf[2])) + end end) trace_codename("psynch_mutex_ulock", function(buf) local prefix = get_prefix(buf) if trace.debugid_is_start(buf.debugid) then - printf("%s\tlock busy, waiting in kernel (addr: 0x%016x, lval: %s, sval: %s, owner_tid: 0x%x)\n", + printf("%s\tlock busy, waiting in kernel\taddr: 0x%016x\tlval: %s\tsval: %s\towner_tid: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4]) elseif trace.debugid_is_end(buf.debugid) then - printf("%s\tlock acquired from kernel (addr: 0x%016x, updated bits: %s)\n", + printf("%s\tlock acquired from kernel\taddr: 0x%016x\tupdt: %s\n", prefix, buf[1], decode_lval(buf[2])) else - printf("%s\tlock taken, uncontended (addr: 0x%016x, lval: %s, sval: %s)\n", + printf("%s\tlock taken userspace\t\taddr: 0x%016x\tlval: %s\tsval: %s\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3])) end end) trace_codename("psynch_mutex_utrylock_failed", function(buf) local prefix = get_prefix(buf) - printf("%s\tmutex trybusy addr: 0x%016x lval: %s sval: %s owner: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4]) + printf("%s\tmutex trybusy\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\towner: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4]) end) trace_codename("psynch_mutex_uunlock", function(buf) local prefix = get_prefix(buf) if trace.debugid_is_start(buf.debugid) then - printf("%s\tunlock, signalling kernel waiters (addr: 0x%016x, lval: %s, sval: %s, owner_tid: 0x%x)\n", + printf("%s\tunlock, signalling kernel\taddr: 0x%016x\tlval: %s\tsval: %s\towner_tid: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4]) elseif trace.debugid_is_end(buf.debugid) then - printf("%s\tunlock, waiters signalled (addr: 0x%016x, updated bits: %s)\n", + printf("%s\tunlock, waiters signalled\taddr: 0x%016x\tupdt: %s\n", prefix, buf[1], decode_lval(buf[2])) else - printf("%s\tunlock, no kernel waiters (addr: 0x%016x, lval: %s, sval: %s)\n", + printf("%s\tunlock, no kernel waiters\taddr: 0x%016x\tlval: %s\tsval: %s\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3])) end end) --- The trace codes we need aren't enabled by default -darwin.sysctlbyname("kern.pthread_debug_tracing", 1) -completion_handler = function() - darwin.sysctlbyname("kern.pthread_debug_tracing", 0) -end -trace.set_completion_handler(completion_handler) +trace_codename("psynch_mutex_clearprepost", function(buf) + local prefix = get_prefix(buf) + printf("%s\tclear prepost\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3])) +end) + +trace_codename("psynch_mutex_markprepost", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tmark prepost\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3])) + else + printf("%s\tmark prepost\t\t\taddr: 0x%016x\tcleared: %d\n", + prefix, buf[1], buf[2]) + end +end) + +trace_codename("psynch_mutex_kwqallocate", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tkernel kwq allocated\t\taddr: 0x%016x\ttype: %s\tkwq: 0x%016x\n", + prefix, buf[1], get_kwq_type(buf[2]), buf[3]) + elseif trace.debugid_is_end(buf.debugid) then + printf("%s\tkernel kwq allocated\t\taddr: 0x%016x\tlval: %s\tuval: %s\tsval: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4])) + end +end) + +trace_codename("psynch_mutex_kwqdeallocate", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tkernel kwq deallocated\t\taddr: 0x%016x\ttype: %s\tfreenow: %d\n", + prefix, buf[1], get_kwq_type(buf[2]), buf[3]) + elseif trace.debugid_is_end(buf.debugid) then + printf("%s\tkernel kwq deallocated\t\taddr: 0x%016x\tlval: %s\tuval: %s\tsval: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4])) + end +end) + +trace_codename("psynch_mutex_kwqprepost", function(buf) + local prefix = get_prefix(buf) + if buf[4] == 0 then + printf("%s\tkernel prepost incremented\taddr: 0x%016x\tlval: %s\tinqueue: %d\n", + prefix, buf[1], decode_lval(buf[2]), buf[3]) + elseif buf[4] == 1 then + printf("%s\tkernel prepost decremented\taddr: 0x%016x\tlval: %s\tremaining: %d\n", + prefix, buf[1], decode_lval(buf[2]), buf[3]) + elseif buf[4] == 2 then + printf("%s\tkernel prepost cleared\t\taddr: 0x%016x\tlval: %s\n", prefix, + buf[1], decode_lval(buf[2])) + end +end) + +trace_codename("psynch_mutex_kwqcollision", function(buf) + local prefix = get_prefix(buf) + printf("%s\tkernel kwq collision\t\taddr: 0x%016x\ttype: %d\n", prefix, + buf[1], buf[2]) +end) + +trace_codename("psynch_mutex_kwqsignal", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tkernel mutex signal\t\taddr: 0x%016x\tkwe: 0x%16x\ttid: 0x%x\tinqueue: %d\n", + prefix, buf[1], buf[2], buf[3], buf[4]); + else + printf("%s\tkernel mutex signal\t\taddr: 0x%016x\tkwe: 0x%16x\tret: 0x%x\n", + prefix, buf[1], buf[2], buf[3]); + end +end) + +trace_codename("psynch_mutex_kwqwait", function(buf) + local prefix = get_prefix(buf) + printf("%s\tkernel mutex wait\t\taddr: 0x%016x\tinqueue: %d\tprepost: %d\tintr: %d\n", + prefix, buf[1], buf[2], buf[3], buf[4]) +end) + +trace_codename("psynch_cvar_kwait", function(buf) + local prefix = get_prefix(buf) + if buf[4] == 0 then + printf("%s\tkernel condvar wait\t\taddr: 0x%016x\tmutex: 0x%016x\tcgen: 0x%x\n", + prefix, buf[1], buf[2], buf[3]) + elseif buf[4] == 1 then + printf("%s\tkernel condvar sleep\t\taddr: 0x%016x\tflags: 0x%x\n", + prefix, buf[1], buf[3]) + elseif buf[4] == 2 then + printf("%s\tkernel condvar wait return\taddr: 0x%016x\terror: 0x%x\tupdt: 0x%x\n", + prefix, buf[1], buf[2], buf[3]) + elseif buf[4] == 3 and (buf[2] & 0xff) == 60 then + printf("%s\tkernel condvar timeout\t\taddr: 0x%016x\terror: 0x%x\n", + prefix, buf[1], buf[2]) + elseif buf[4] == 3 then + printf("%s\tkernel condvar wait error\taddr: 0x%016x\terror: 0x%x\n", + prefix, buf[1], buf[2]) + elseif buf[4] == 4 then + printf("%s\tkernel condvar wait return\taddr: 0x%016x\tupdt: 0x%x\n", + prefix, buf[1], buf[2]) + end +end) + +trace_codename("psynch_cvar_clrprepost", function(buf) + local prefix = get_prefix(buf) + printf("%s\tkernel condvar clear prepost:\taddr: 0x%016x\ttype: 0x%x\tprepost seq: %s\n", + prefix, buf[1], buf[2], decode_lval(buf[3])) +end) + +trace_codename("psynch_cvar_freeitems", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tcvar free fake/prepost items\taddr: 0x%016x\ttype: %d\t\t\tupto: %s\tall: %d\n", + prefix, buf[1], buf[2], decode_lval(buf[3]), buf[4]) + elseif trace.debugid_is_end(buf.debugid) then + printf("%s\tcvar free fake/prepost items\taddr: 0x%016x\tfreed: %d\tsignaled: %d\tinqueue: %d\n", + prefix, buf[1], buf[2], buf[3], buf[4]) + elseif buf[4] == 1 then + printf("%s\tcvar free, signalling waiter\taddr: 0x%016x\tinqueue: %d\tkwe: 0x%016x\n", + prefix, buf[1], buf[3], buf[2]) + elseif buf[4] == 2 then + printf("%s\tcvar free, removing fake\taddr: 0x%016x\tinqueue: %d\tkwe: 0x%016x\n", + prefix, buf[1], buf[3], buf[2]) + end +end) + +trace_codename("psynch_cvar_signal", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tfrom: %s\tupto: %s\tbroad: %d\n", + prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), buf[4]) + elseif trace.debugid_is_end(buf.debugid) then + printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tupdt: %s\n", + prefix, buf[1], decode_cv_sval(buf[2])) + else + printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tsignalled waiters (converted to broadcast: %d)\n", + prefix, buf[1], buf[2]) + end +end) + +trace_codename("psynch_cvar_broadcast", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tupto: %s\tinqueue: %d\n", + prefix, buf[1], decode_lval(buf[2]), buf[3]) + elseif trace.debugid_is_end(buf.debugid) then + printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tupdt: %s\n", + prefix, buf[1], decode_lval(buf[2])) + elseif buf[4] == 1 then + printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tsignalling: 0x%16x\n", + prefix, buf[1], buf[2]) + elseif buf[4] == 2 then + printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tremoving fake: 0x%16x\tstate: %d\n", + prefix, buf[1], buf[2], buf[3]) + elseif buf[4] == 3 then + printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tprepost\tlval: %s\tsval: %s\n", + prefix, buf[1], decode_lval(buf[2]), decode_cv_sval(buf[3])) + elseif buf[4] == 4 then + printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tbroadcast prepost: 0x%016x\n", + prefix, buf[1], buf[2]) + end +end) + +trace_codename("psynch_cvar_zeroed", function(buf) + local prefix = get_prefix(buf) + printf("%s\tkernel cvar zeroed\t\taddr: 0x%016x\tlval: %s\tsval: %s\tinqueue: %d\n", + prefix, buf[1], decode_lval(buf[2]), decode_cv_sval(buf[3]), buf[4]) +end) + +trace_codename("psynch_cvar_updateval", function(buf) + local prefix = get_prefix(buf) + if trace.debugid_is_start(buf.debugid) then + printf("%s\tcvar updateval\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\tupdateval: %s\n", + prefix, buf[1], decode_lval(buf[2] & 0xffffffff), decode_cv_sval(buf[2] >> 32), decode_cv_sval(buf[3])) + elseif trace.debugid_is_end(buf.debugid) then + printf("%s\tcvar updateval (updated)\taddr: 0x%016x\tlval: %s\tsval: %s\tdiffgen: %d\tneedsclear: %d\n", + prefix, buf[1], decode_lval(buf[2] & 0xffffffff), decode_cv_sval(buf[2] >> 32), buf[3] >> 32, buf[3] & 0x1) + end +end) + diff --git a/tools/wqtrace.lua b/tools/wqtrace.lua deleted file mode 100755 index 2da03da..0000000 --- a/tools/wqtrace.lua +++ /dev/null @@ -1,349 +0,0 @@ -#!/usr/local/bin/luatrace -s - -trace_codename = function(codename, callback) - local debugid = trace.debugid(codename) - if debugid ~= 0 then - trace.single(debugid,callback) - else - printf("WARNING: Cannot locate debugid for '%s'\n", codename) - end -end - -initial_timestamp = 0 -workqueue_ptr_map = {}; -get_prefix = function(buf) - if initial_timestamp == 0 then - initial_timestamp = buf.timestamp - end - local secs = trace.convert_timestamp_to_nanoseconds(buf.timestamp - initial_timestamp) / 1000000000 - - local prefix - if trace.debugid_is_start(buf.debugid) then - prefix = "→" - elseif trace.debugid_is_end(buf.debugid) then - prefix = "←" - else - prefix = "↔" - end - - local proc - if buf.command ~= "kernel_task" then - proc = buf.command - workqueue_ptr_map[buf[1]] = buf.command - elseif workqueue_ptr_map[buf[1]] ~= nil then - proc = workqueue_ptr_map[buf[1]] - else - proc = "UNKNOWN" - end - - return string.format("%s %6.9f %-17s [%05d.%06x] %-24s", - prefix, secs, proc, buf.pid, buf.threadid, buf.debugname) -end - -parse_pthread_priority = function(pri) - pri = pri & 0xffffffff - if (pri & 0x02000000) == 0x02000000 then - return "Manager" - end - local qos = (pri & 0x00ffff00) >> 8 - if qos == 0x20 then - return string.format("UI[%x]", pri); - elseif qos == 0x10 then - return string.format("IN[%x]", pri); - elseif qos == 0x08 then - return string.format("DF[%x]", pri); - elseif qos == 0x04 then - return string.format("UT[%x]", pri); - elseif qos == 0x02 then - return string.format("BG[%x]", pri); - elseif qos == 0x01 then - return string.format("MT[%x]", pri); - elseif qos == 0x00 then - return string.format("--[%x]", pri); - else - return string.format("??[%x]", pri); - end -end - -parse_qos_bucket = function(pri) - if pri == 0 then - return string.format("UI[%x]", pri); - elseif pri == 1 then - return string.format("IN[%x]", pri); - elseif pri == 2 then - return string.format("DF[%x]", pri); - elseif pri == 3 then - return string.format("UT[%x]", pri); - elseif pri == 4 then - return string.format("BG[%x]", pri); - elseif pri == 5 then - return string.format("MT[%x]", pri); - elseif pri == 6 then - return string.format("MG[%x]", pri); - else - return string.format("??[%x]", pri); - end -end - -parse_thactive_req_bucket = function(pri) - if pri ~= 6 then - return parse_qos_bucket(pri) - end - return "None" -end - -get_thactive = function(low, high) - return string.format("req: %s, MG: %d, UI: %d, IN: %d, DE: %d, UT: %d, BG: %d, MT: %d", - parse_thactive_req_bucket(high >> (16 * 3)), (high >> (2 * 16)) & 0xffff, - (low >> (0 * 16)) & 0xffff, (low >> (1 * 16)) & 0xffff, - (low >> (2 * 16)) & 0xffff, (low >> (3 * 16)) & 0xffff, - (high >> (0 * 16)) & 0xffff, (high >> (1 * 16)) & 0xffff) -end - --- workqueue lifecycle - -trace_codename("wq_pthread_exit", function(buf) - local prefix = get_prefix(buf) - if trace.debugid_is_start(buf.debugid) then - printf("%s\tprocess is exiting\n",prefix) - else - printf("%s\tworkqueue marked as exiting and timer is complete\n",prefix) - end -end) - -trace_codename("wq_workqueue_exit", function(buf) - local prefix = get_prefix(buf) - if trace.debugid_is_start(buf.debugid) then - printf("%s\tall threads have exited, cleaning up\n",prefix) - else - printf("%s\tclean up complete\n",prefix) - end -end) - -trace_codename("wq_start_add_timer", function(buf) - local prefix = get_prefix(buf) - printf("%s\tarming timer to fire in %d us (flags: %x, reqcount: %d)\n", - prefix, buf.arg4, buf.arg3, buf.arg2) -end) - -trace_codename("wq_add_timer", function(buf) - local prefix = get_prefix(buf) - if trace.debugid_is_start(buf.debugid) then - printf("%s\tadd_timer fired (flags: %x, nthreads: %d, thidlecount: %d)\n", - prefix, buf.arg2, buf.arg3, buf.arg4) - elseif trace.debugid_is_end(buf.debugid) then - printf("%s\tadd_timer completed (start_timer: %x, nthreads: %d, thidlecount: %d)\n", - prefix, buf.arg2, buf.arg3, buf.arg4) - else - printf("%s\tadd_timer added threads (reqcount: %d, thidlecount: %d, busycount: %d)\n", - prefix, buf.arg2, buf.arg3, buf.arg4) - - end -end) - -trace_codename("wq_run_threadreq", function(buf) - local prefix = get_prefix(buf) - if trace.debugid_is_start(buf.debugid) then - if buf[2] > 0 then - printf("%s\trun_threadreq: %x (priority: %s, flags: %d) on %x\n", - prefix, buf[2], parse_qos_bucket(buf[4] >> 16), buf[4] & 0xff, buf[3]) - else - printf("%s\trun_threadreq: on %x\n", - prefix, buf[3]) - end - else - if buf[2] == 1 then - printf("%s\tpended event manager, already running\n", prefix) - elseif buf[2] == 2 then - printf("%s\tnothing to do\n", prefix) - elseif buf[2] == 3 then - printf("%s\tno eligible request found\n", prefix) - elseif buf[2] == 4 then - printf("%s\tadmission control failed\n", prefix) - elseif buf[2] == 5 then - printf("%s\tunable to add new thread (may_add_new_thread: %d, nthreads: %d)\n", prefix, buf[3], buf[4]) - elseif buf[2] == 6 then - printf("%s\tthread creation failed\n", prefix) - elseif buf[2] == 0 then - printf("%s\tsuccess\n", prefix) - else - printf("%s\tWARNING: UNKNOWN END CODE:%d\n", prefix, buf.arg4) - end - end -end) - -trace_codename("wq_run_threadreq_mgr_merge", function(buf) - local prefix = get_prefix(buf) - printf("%s\t\tmerging incoming manager request into existing\n", prefix) -end) - -trace_codename("wq_run_threadreq_req_select", function(buf) - local prefix = get_prefix(buf) - if buf[3] == 1 then - printf("%s\t\tselected event manager request %x\n", prefix, buf[2]) - elseif buf[3] == 2 then - printf("%s\t\tselected overcommit request %x\n", prefix, buf[2]) - elseif buf[3] == 3 then - printf("%s\t\tselected constrained request %x\n", prefix, buf[2]) - else - printf("%s\t\tWARNING: UNKNOWN DECISION CODE:%d\n", prefix, buf.arg[3]) - end -end) - -trace_codename("wq_run_threadreq_thread_select", function(buf) - local prefix = get_prefix(buf) - if buf[2] == 1 then - printf("%s\t\trunning on current thread %x\n", prefix, buf[3]) - elseif buf[2] == 2 then - printf("%s\t\trunning on idle thread %x\n", prefix, buf[3]) - elseif buf[2] == 3 then - printf("%s\t\tcreated new thread\n", prefix) - else - printf("%s\t\tWARNING: UNKNOWN DECISION CODE:%d\n", prefix, buf.arg[2]) - end -end) - -trace_codename("wq_thread_reset_priority", function(buf) - local prefix = get_prefix(buf) - local old_qos = buf[3] >> 16; - local new_qos = buf[3] & 0xff; - if buf[4] == 1 then - printf("%s\t\treset priority of %x from %s to %s\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos)) - elseif buf[4] == 2 then - printf("%s\t\treset priority of %x from %s to %s for reserve manager\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos)) - elseif buf[4] == 3 then - printf("%s\t\treset priority of %x from %s to %s for cleanup\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos)) - end -end) - -trace_codename("wq_thread_park", function(buf) - local prefix = get_prefix(buf) - if trace.debugid_is_start(buf.debugid) then - printf("%s\tthread parking\n", prefix) - else - printf("%s\tthread woken\n", prefix) - end -end) - -trace_codename("wq_thread_squash", function(buf) - local prefix = get_prefix(buf) - printf("%s\tthread squashed from %s to %s\n", prefix, - parse_qos_bucket(buf[2]), parse_qos_bucket(buf[3])) -end) - -trace.enable_thread_cputime() -runitem_time_map = {} -runitem_cputime_map = {} -trace_codename("wq_runitem", function(buf) - local prefix = get_prefix(buf) - if trace.debugid_is_start(buf.debugid) then - runitem_time_map[buf.threadid] = buf.timestamp; - runitem_cputime_map[buf.threadid] = trace.cputime_for_thread(buf.threadid); - - printf("%s\tSTART running item @ %s\n", prefix, parse_qos_bucket(buf[3])) - elseif runitem_time_map[buf.threadid] then - local time = buf.timestamp - runitem_time_map[buf.threadid] - local cputime = trace.cputime_for_thread(buf.threadid) - runitem_cputime_map[buf.threadid] - - local time_ms = trace.convert_timestamp_to_nanoseconds(time) / 1000000 - local cputime_ms = trace.convert_timestamp_to_nanoseconds(cputime) / 1000000 - - printf("%s\tDONE running item @ %s: time = %6.6f ms, cputime = %6.6f ms\n", - prefix, parse_qos_bucket(buf[2]), time_ms, cputime_ms) - - runitem_time_map[buf.threadid] = 0 - runitem_cputime_map[buf.threadid] = 0 - else - printf("%s\tDONE running item @ %s\n", prefix, parse_qos_bucket(buf[2])) - end -end) - -trace_codename("wq_runthread", function(buf) - local prefix = get_prefix(buf) - if trace.debugid_is_start(buf.debugid) then - printf("%s\tSTART running thread\n", prefix) - elseif trace.debugid_is_end(buf.debugid) then - printf("%s\tDONE running thread\n", prefix) - end -end) - -trace_codename("wq_thactive_update", function(buf) - local prefix = get_prefix(buf) - local thactive = get_thactive(buf[2], buf[3]) - if buf[1] == 1 then - printf("%s\tthactive constrained pre-post (%s)\n", prefix, thactive) - elseif buf[1] == 2 then - printf("%s\tthactive constrained run (%s)\n", prefix, thactive) - else - return - end -end) - -trace_codename("wq_thread_block", function(buf) - local prefix = get_prefix(buf) - local req_pri = parse_thactive_req_bucket(buf[3] >> 8) - if trace.debugid_is_start(buf.debugid) then - printf("%s\tthread blocked (activecount: %d, priority: %s, req_pri: %s, reqcount: %d, start_timer: %d)\n", - prefix, buf[2], parse_qos_bucket(buf[3] & 0xff), req_pri, buf[4] >> 1, buf[4] & 0x1) - else - printf("%s\tthread unblocked (activecount: %d, priority: %s, req_pri: %s, threads_scheduled: %d)\n", - prefix, buf[2], parse_qos_bucket(buf[3] & 0xff), req_pri, buf[4]) - end -end) - -trace_codename("wq_thread_create_failed", function(buf) - local prefix = get_prefix(buf) - if buf[3] == 0 then - printf("%s\tfailed to create new workqueue thread, kern_return: 0x%x\n", - prefix, buf[2]) - elseif buf[3] == 1 then - printf("%s\tfailed to vm_map workq thread stack: 0x%x\n", prefix, buf[2]) - elseif buf[3] == 2 then - printf("%s\tfailed to vm_protect workq thread guardsize: 0x%x\n", prefix, buf[2]) - end -end) - -trace_codename("wq_thread_create", function(buf) - printf("%s\tcreated new workqueue thread\n", get_prefix(buf)) -end) - -trace_codename("wq_wqops_reqthreads", function(buf) - local prefix = get_prefix(buf) - printf("%s\tuserspace requested %d threads at %s\n", prefix, buf[2], parse_pthread_priority(buf[3])); -end) - -trace_codename("wq_kevent_reqthreads", function(buf) - local prefix = get_prefix(buf) - if buf[4] == 0 then - printf("%s\tkevent requested a thread at %s\n", prefix, parse_pthread_priority(buf[3])); - elseif buf[4] == 1 then - printf("%s\tworkloop requested a thread for req %x at %s\n", prefix, buf[2], parse_pthread_priority(buf[3])); - elseif buf[4] == 2 then - printf("%s\tworkloop updated priority of req %x to %s\n", prefix, buf[2], parse_pthread_priority(buf[3])); - elseif buf[4] == 3 then - printf("%s\tworkloop canceled req %x\n", prefix, buf[2], parse_pthread_priority(buf[3])); - elseif buf[4] == 4 then - printf("%s\tworkloop redrove a thread request\n", prefix); - end -end) - -trace_codename("wq_constrained_admission", function(buf) - local prefix = get_prefix(buf) - if buf[2] == 1 then - printf("fail: %s\twq_constrained_threads_scheduled=%d >= wq_max_constrained_threads=%d\n", - prefix, buf[3], buf[4]) - elseif (buf[2] == 2) or (buf[2] == 3) then - local success = nil; - if buf[2] == 2 then success = "success" - else success = "fail" end - printf("%s: %s\tthactive_count=%d + busycount=%d >= wq->wq_max_concurrency\n", - prefix, success, buf[3], buf[4]) - end -end) - --- The trace codes we need aren't enabled by default -darwin.sysctlbyname("kern.pthread_debug_tracing", 1) -completion_handler = function() - darwin.sysctlbyname("kern.pthread_debug_tracing", 0) -end -trace.set_completion_handler(completion_handler) diff --git a/xcodescripts/install-lldbmacros.sh b/xcodescripts/install-lldbmacros.sh index e50ee44..9501f96 100644 --- a/xcodescripts/install-lldbmacros.sh +++ b/xcodescripts/install-lldbmacros.sh @@ -2,4 +2,17 @@ # install the pthread lldbmacros into the module mkdir -p $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python || true -rsync -aq $SRCROOT/lldbmacros/* $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python/ +rsync -aq $SRCROOT/lldbmacros/* $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python + +for variant in $BUILD_VARIANTS; do + case $variant in + normal) + SUFFIX="" + ;; + *) + SUFFIX="_$variant" + ;; + esac + + ln -sf init.py $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python/$EXECUTABLE_NAME$SUFFIX.py +done diff --git a/xcodescripts/kext.xcconfig b/xcodescripts/kext.xcconfig index fcd42ea..84e9079 100644 --- a/xcodescripts/kext.xcconfig +++ b/xcodescripts/kext.xcconfig @@ -41,7 +41,7 @@ LLVM_LTO_development = NO LLVM_LTO_kasan = NO LLVM_LTO = $(LLVM_LTO_$(PTHREAD_VARIANT)) -GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T +GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T __PTHREAD_EXPOSE_INTERNALS__ GCC_PREPROCESSOR_DEFINITIONS_kext_development = MACH_ASSERT DEBUG GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS_kext) $(GCC_PREPROCESSOR_DEFINITIONS_kext_$(PTHREAD_VARIANT)) diff --git a/xcodescripts/pthread.dirty b/xcodescripts/pthread.dirty new file mode 100644 index 0000000..2a8f66e --- /dev/null +++ b/xcodescripts/pthread.dirty @@ -0,0 +1,33 @@ +# cacheline-aligned + +# uint64_t sized +___pthread_stack_hint + +# pointer-sized +___libdispatch_keventfunction +___libdispatch_workerfunction +___libdispatch_workloopfunction +___pthread_head +__main_thread_ptr +__pthread_free +__pthread_keys +__pthread_malloc +__pthread_ptr_munge_token +_exitf + +# int-sized +___is_threaded +___libdispatch_offset +___pthread_supported_features +___pthread_tsd_lock +___pthread_tsd_max +___unix_conforming +__main_qos +__pthread_count +__pthread_list_lock + +# byte-sized +___workq_newapi +_default_priority +_max_priority +_min_priority diff --git a/xcodescripts/pthread.xcconfig b/xcodescripts/pthread.xcconfig index 7b2f244..1dedcaa 100644 --- a/xcodescripts/pthread.xcconfig +++ b/xcodescripts/pthread.xcconfig @@ -57,7 +57,7 @@ DISABLED_WARNING_CFLAGS = -Wno-int-conversion -Wno-missing-prototypes -Wno-sign- WARNING_CFLAGS = -Wall -Wextra -Warray-bounds-pointer-arithmetic -Wcomma -Wconditional-uninitialized -Wcovered-switch-default -Wdate-time -Wdeprecated -Wdouble-promotion -Wduplicate-enum -Wfloat-equal -Widiomatic-parentheses -Wignored-qualifiers -Wimplicit-fallthrough -Wmissing-noreturn -Wnullable-to-nonnull-conversion -Wover-aligned -Wpointer-arith -Wstatic-in-inline -Wtautological-compare -Wunguarded-availability -Wunused $(NO_WARNING_CFLAGS) $(DISABLED_WARNING_CFLAGS) NO_WARNING_CFLAGS = -Wno-pedantic -Wno-bad-function-cast -Wno-c++98-compat-pedantic -Wno-cast-align -Wno-cast-qual -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-format-nonliteral -Wno-missing-variable-declarations -Wno-packed -Wno-padded -Wno-reserved-id-macro -Wno-switch-enum -Wno-undef -Wno-unreachable-code-aggressive -Wno-unused-macros -Wno-used-but-marked-unused -BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS) +BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS) __PTHREAD_EXPOSE_INTERNALS__ GCC_PREPROCESSOR_DEFINITIONS = $(BASE_PREPROCESSOR_MACROS) $(PLATFORM_PREPROCESSOR_DEFINITIONS) // TODO: Remove -fstack-protector on _debug when it is moved to libplatform @@ -68,8 +68,10 @@ OTHER_CFLAGS_debug = -fno-inline -O0 -DDEBUG=1 LINK_WITH_STANDARD_LIBRARIES = NO DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion) DYLIB_COMPATIBILITY_VERSION = 1 +DIRTY_LDFLAGS = -Wl,-dirty_data_list,$(SRCROOT)/xcodescripts/pthread.dirty +DIRTY_LDFLAGS[sdk=macos*] = DYLIB_LDFLAGS = -Wl,-alias_list,$(SRCROOT)/xcodescripts/pthread.aliases -Wl,-umbrella,System -L/usr/lib/system -lsystem_kernel -lsystem_platform -ldyld -lcompiler_rt -OTHER_LDFLAGS = $(DYLIB_LDFLAGS) $(CR_LDFLAGS) $(PLATFORM_LDFLAGS) +OTHER_LDFLAGS = $(DYLIB_LDFLAGS) $(DIRTY_LDFLAGS) $(CR_LDFLAGS) $(PLATFORM_LDFLAGS) // Simulator build rules EXCLUDED_SOURCE_FILE_NAMES[sdk=iphonesimulator*] = *.c *.s diff --git a/xcodescripts/resolved.xcconfig b/xcodescripts/resolved.xcconfig index 2b33118..863252a 100644 --- a/xcodescripts/resolved.xcconfig +++ b/xcodescripts/resolved.xcconfig @@ -1,6 +1,6 @@ #include "pthread.xcconfig" -SUPPORTED_PLATFORMS = iphoneos appletvos watchos +SUPPORTED_PLATFORMS = iphoneos PRODUCT_NAME = pthread_$(RESOLVED_VARIANT) OTHER_LDFLAGS = SKIP_INSTALL = YES