]> git.saurik.com Git - apple/libpthread.git/commitdiff
libpthread-330.201.1.tar.gz macos-1014 v330.201.1
authorApple <opensource@apple.com>
Thu, 4 Oct 2018 22:01:40 +0000 (22:01 +0000)
committerApple <opensource@apple.com>
Thu, 4 Oct 2018 22:01:40 +0000 (22:01 +0000)
52 files changed:
kern/kern_init.c
kern/kern_internal.h
kern/kern_policy.c [deleted file]
kern/kern_support.c
kern/kern_synch.c
kern/kern_trace.h
kern/synch_internal.h
kern/workqueue_internal.h
libpthread.xcodeproj/project.pbxproj
lldbmacros/init.py [new file with mode: 0644]
lldbmacros/pthread.py [deleted file]
man/pthread_mutexattr.3
private/dependency_private.h [new file with mode: 0644]
private/private.h
private/qos_private.h
private/tsd_private.h
private/workqueue_private.h
pthread/introspection.h
pthread/pthread.h
pthread/pthread_spis.h
pthread/stack_np.h [new file with mode: 0644]
src/internal.h
src/offsets.h [new file with mode: 0644]
src/pthread.c
src/pthread_asm.s
src/pthread_cancelable.c
src/pthread_cond.c
src/pthread_dependency.c [new file with mode: 0644]
src/pthread_mutex.c
src/pthread_rwlock.c
src/pthread_tsd.c
src/qos.c
src/thread_setup.c
tests/Makefile
tests/cond_prepost.c [new file with mode: 0644]
tests/main_stack_custom.c
tests/mutex.c
tests/mutex_prepost.c [new file with mode: 0644]
tests/perf_contended_mutex_rwlock.c [new file with mode: 0644]
tests/pthread_dependency.c [new file with mode: 0644]
tests/pthread_threadid_np.c
tests/rdar_32848402.c
tests/stack.c [new file with mode: 0644]
tests/stack_aslr.c
tests/stack_size.c [new file with mode: 0644]
tools/locktrace.lua
tools/wqtrace.lua [deleted file]
xcodescripts/install-lldbmacros.sh
xcodescripts/kext.xcconfig
xcodescripts/pthread.dirty [new file with mode: 0644]
xcodescripts/pthread.xcconfig
xcodescripts/resolved.xcconfig

index 3de9b5d034ed71a41164c8f94de37d1534db8b10..3321483ffafae2a6e7bf650006bee3242a3358cc 100644 (file)
@@ -17,21 +17,12 @@ pthread_callbacks_t pthread_kern;
 
 const struct pthread_functions_s pthread_internal_functions = {
        .pthread_init = _pthread_init,
 
 const struct pthread_functions_s pthread_internal_functions = {
        .pthread_init = _pthread_init,
-       .fill_procworkqueue = (int(*)(proc_t, void*))_fill_procworkqueue,
-       .get_pwq_state_kdp = _get_pwq_state_kdp,
-       .workqueue_exit = _workqueue_exit,
-       .workqueue_mark_exiting = _workqueue_mark_exiting,
-       .workqueue_thread_yielded = _workqueue_thread_yielded,
-       .workqueue_get_sched_callback = _workqueue_get_sched_callback,
        .pth_proc_hashinit = _pth_proc_hashinit,
        .pth_proc_hashdelete = _pth_proc_hashdelete,
        .bsdthread_create = _bsdthread_create,
        .bsdthread_register = _bsdthread_register,
        .bsdthread_terminate = _bsdthread_terminate,
        .pth_proc_hashinit = _pth_proc_hashinit,
        .pth_proc_hashdelete = _pth_proc_hashdelete,
        .bsdthread_create = _bsdthread_create,
        .bsdthread_register = _bsdthread_register,
        .bsdthread_terminate = _bsdthread_terminate,
-       .bsdthread_ctl = _bsdthread_ctl,
        .thread_selfid = _thread_selfid,
        .thread_selfid = _thread_selfid,
-       .workq_kernreturn = _workq_kernreturn,
-       .workq_open = _workq_open,
 
        .psynch_mutexwait = _psynch_mutexwait,
        .psynch_mutexdrop = _psynch_mutexdrop,
 
        .psynch_mutexwait = _psynch_mutexwait,
        .psynch_mutexdrop = _psynch_mutexdrop,
@@ -48,12 +39,11 @@ const struct pthread_functions_s pthread_internal_functions = {
        .pthread_find_owner = _pthread_find_owner,
        .pthread_get_thread_kwq = _pthread_get_thread_kwq,
 
        .pthread_find_owner = _pthread_find_owner,
        .pthread_get_thread_kwq = _pthread_get_thread_kwq,
 
-       .workq_reqthreads = _workq_reqthreads,
-       .thread_qos_from_pthread_priority = _thread_qos_from_pthread_priority,
-       .pthread_priority_canonicalize2 = _pthread_priority_canonicalize,
-       .workq_thread_has_been_unbound = _workq_thread_has_been_unbound,
-       .workq_threadreq = workq_kern_threadreq,
-       .workq_threadreq_modify = workq_kern_threadreq_modify,
+       .workq_create_threadstack = workq_create_threadstack,
+       .workq_destroy_threadstack = workq_destroy_threadstack,
+       .workq_setup_thread = workq_setup_thread,
+       .workq_handle_stack_events = workq_handle_stack_events,
+       .workq_markfree_threadstack = workq_markfree_threadstack,
 };
 
 kern_return_t pthread_start(__unused kmod_info_t * ki, __unused void *d)
 };
 
 kern_return_t pthread_start(__unused kmod_info_t * ki, __unused void *d)
index fa2c27b926f3c89f65312bbe8a6123db96c69380..bb29cdc6cb0af7f59d4ba245d4816fc83d524bfc 100644 (file)
 #ifndef _SYS_PTHREAD_INTERNAL_H_
 #define _SYS_PTHREAD_INTERNAL_H_
 
 #ifndef _SYS_PTHREAD_INTERNAL_H_
 #define _SYS_PTHREAD_INTERNAL_H_
 
+#include <pthread/bsdthread_private.h>
+#include <pthread/priority_private.h>
+#include <pthread/workqueue_syscalls.h>
+
 #ifdef KERNEL
 #ifdef KERNEL
+struct ksyn_waitq_element;
 #include <stdatomic.h>
 #include <kern/thread_call.h>
 #include <kern/kcdata.h>
 #include <stdatomic.h>
 #include <kern/thread_call.h>
 #include <kern/kcdata.h>
 #define PTHREAD_FEATURE_WORKLOOP          0x80         /* supports workloops */
 #define PTHREAD_FEATURE_QOS_DEFAULT            0x40000000      /* the kernel supports QOS_CLASS_DEFAULT */
 
 #define PTHREAD_FEATURE_WORKLOOP          0x80         /* supports workloops */
 #define PTHREAD_FEATURE_QOS_DEFAULT            0x40000000      /* the kernel supports QOS_CLASS_DEFAULT */
 
-/* pthread bsdthread_ctl sysctl commands */
-#define BSDTHREAD_CTL_SET_QOS                          0x10    /* bsdthread_ctl(BSDTHREAD_CTL_SET_QOS, thread_port, tsd_entry_addr, 0) */
-#define BSDTHREAD_CTL_GET_QOS                          0x20    /* bsdthread_ctl(BSDTHREAD_CTL_GET_QOS, thread_port, 0, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_START       0x40    /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_START, thread_port, priority, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_END         0x80    /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_END, thread_port, 0, 0) */
-#define BSDTHREAD_CTL_SET_SELF                         0x100   /* bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, flags) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_RESET       0x200   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_RESET, 0, 0, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH    0x400   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH, thread_port, priority, 0) */
-#define BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD                           0x401   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD, thread_port, priority, resource) */
-#define BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET                         0x402   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET, 0|1 (?reset_all), resource, 0) */
-#define BSDTHREAD_CTL_QOS_MAX_PARALLELISM      0x800   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_MAX_PARALLELISM, priority, flags, 0) */
-
-/* qos_class_t is mapped into one of these bits in the bitfield, this mapping now exists here because
- * libdispatch requires the QoS class mask of the pthread_priority_t to be a bitfield.
- */
-#define __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE 0x20
-#define __PTHREAD_PRIORITY_CBIT_USER_INITIATED 0x10
-#define __PTHREAD_PRIORITY_CBIT_DEFAULT 0x8
-#define __PTHREAD_PRIORITY_CBIT_UTILITY 0x4
-#define __PTHREAD_PRIORITY_CBIT_BACKGROUND 0x2
-#define __PTHREAD_PRIORITY_CBIT_MAINTENANCE 0x1
-#define __PTHREAD_PRIORITY_CBIT_UNSPECIFIED 0x0
-
-static inline int
-_pthread_qos_class_to_thread_qos(qos_class_t qos)
-{
-       switch (qos) {
-       case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
-       case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
-       case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
-       case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
-       case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
-       case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
-       default: return THREAD_QOS_UNSPECIFIED;
-       }
-}
-
-static inline pthread_priority_t
-_pthread_priority_make_newest(qos_class_t qc, int rel, unsigned long flags)
-{
-       pthread_priority_t cls;
-       switch (qc) {
-               case QOS_CLASS_USER_INTERACTIVE: cls = __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE; break;
-               case QOS_CLASS_USER_INITIATED: cls = __PTHREAD_PRIORITY_CBIT_USER_INITIATED; break;
-               case QOS_CLASS_DEFAULT: cls = __PTHREAD_PRIORITY_CBIT_DEFAULT; break;
-               case QOS_CLASS_UTILITY: cls = __PTHREAD_PRIORITY_CBIT_UTILITY; break;
-               case QOS_CLASS_BACKGROUND: cls = __PTHREAD_PRIORITY_CBIT_BACKGROUND; break;
-               case QOS_CLASS_MAINTENANCE: cls = __PTHREAD_PRIORITY_CBIT_MAINTENANCE; break;
-               case QOS_CLASS_UNSPECIFIED:
-               default:
-                       cls = __PTHREAD_PRIORITY_CBIT_UNSPECIFIED;
-                       rel = 1; // results in priority bits == 0 <rdar://problem/16184900>
-                       break;
-       }
-
-       pthread_priority_t p =
-               (flags & _PTHREAD_PRIORITY_FLAGS_MASK) |
-               ((cls << _PTHREAD_PRIORITY_QOS_CLASS_SHIFT) & _PTHREAD_PRIORITY_QOS_CLASS_MASK) |
-               (((uint8_t)rel - 1) & _PTHREAD_PRIORITY_PRIORITY_MASK);
-
-       return p;
-}
-
-static inline qos_class_t
-_pthread_priority_get_qos_newest(pthread_priority_t priority)
-{
-       qos_class_t qc;
-       switch ((priority & _PTHREAD_PRIORITY_QOS_CLASS_MASK) >> _PTHREAD_PRIORITY_QOS_CLASS_SHIFT) {
-               case __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE: qc = QOS_CLASS_USER_INTERACTIVE; break;
-               case __PTHREAD_PRIORITY_CBIT_USER_INITIATED: qc = QOS_CLASS_USER_INITIATED; break;
-               case __PTHREAD_PRIORITY_CBIT_DEFAULT: qc = QOS_CLASS_DEFAULT; break;
-               case __PTHREAD_PRIORITY_CBIT_UTILITY: qc = QOS_CLASS_UTILITY; break;
-               case __PTHREAD_PRIORITY_CBIT_BACKGROUND: qc = QOS_CLASS_BACKGROUND; break;
-               case __PTHREAD_PRIORITY_CBIT_MAINTENANCE: qc = QOS_CLASS_MAINTENANCE; break;
-               case __PTHREAD_PRIORITY_CBIT_UNSPECIFIED:
-               default: qc = QOS_CLASS_UNSPECIFIED; break;
-       }
-       return qc;
-}
-
-#define _pthread_priority_get_relpri(priority) \
-       ((int8_t)((priority & _PTHREAD_PRIORITY_PRIORITY_MASK) >> _PTHREAD_PRIORITY_PRIORITY_SHIFT) + 1)
-
-#define _pthread_priority_get_flags(priority) \
-       (priority & _PTHREAD_PRIORITY_FLAGS_MASK)
-
-#define _pthread_priority_split_newest(priority, qos, relpri) \
-       ({ qos = _pthread_priority_get_qos_newest(priority); \
-          relpri = (qos == QOS_CLASS_UNSPECIFIED) ? 0 : \
-                  _pthread_priority_get_relpri(priority); \
-       })
-
-#define _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL 0x1
-#define _PTHREAD_QOS_PARALLELISM_REALTIME 0x2
-
 /* userspace <-> kernel registration struct, for passing data to/from the kext during main thread init. */
 struct _pthread_registration_data {
        /*
 /* userspace <-> kernel registration struct, for passing data to/from the kext during main thread init. */
 struct _pthread_registration_data {
        /*
@@ -177,9 +87,16 @@ struct _pthread_registration_data {
        uint32_t tsd_offset; /* copy-in */
        uint32_t return_to_kernel_offset; /* copy-in */
        uint32_t mach_thread_self_offset; /* copy-in */
        uint32_t tsd_offset; /* copy-in */
        uint32_t return_to_kernel_offset; /* copy-in */
        uint32_t mach_thread_self_offset; /* copy-in */
+       mach_vm_address_t stack_addr_hint; /* copy-out */
        uint32_t mutex_default_policy; /* copy-out */
 } __attribute__ ((packed));
 
        uint32_t mutex_default_policy; /* copy-out */
 } __attribute__ ((packed));
 
+/*
+ * "error" flags returned by fail condvar syscalls
+ */
+#define ECVCLEARED     0x100
+#define ECVPREPOST     0x200
+
 #ifdef KERNEL
 
 /* The set of features, from the feature bits above, that we support. */
 #ifdef KERNEL
 
 /* The set of features, from the feature bits above, that we support. */
@@ -198,23 +115,16 @@ extern pthread_callbacks_t pthread_kern;
 struct ksyn_waitq_element {
        TAILQ_ENTRY(ksyn_waitq_element) kwe_list;       /* link to other list members */
        void *          kwe_kwqqueue;                   /* queue blocked on */
 struct ksyn_waitq_element {
        TAILQ_ENTRY(ksyn_waitq_element) kwe_list;       /* link to other list members */
        void *          kwe_kwqqueue;                   /* queue blocked on */
-       uint32_t        kwe_state;                      /* state */
+       thread_t        kwe_thread;
+       uint16_t        kwe_state;                      /* state */
+       uint16_t        kwe_flags;
        uint32_t        kwe_lockseq;                    /* the sequence of the entry */
        uint32_t        kwe_count;                      /* upper bound on number of matches still pending */
        uint32_t        kwe_psynchretval;               /* thread retval */
        void            *kwe_uth;                       /* uthread */
        uint32_t        kwe_lockseq;                    /* the sequence of the entry */
        uint32_t        kwe_count;                      /* upper bound on number of matches still pending */
        uint32_t        kwe_psynchretval;               /* thread retval */
        void            *kwe_uth;                       /* uthread */
-       uint64_t        kwe_tid;                        /* tid of waiter */
 };
 typedef struct ksyn_waitq_element * ksyn_waitq_element_t;
 
 };
 typedef struct ksyn_waitq_element * ksyn_waitq_element_t;
 
-pthread_priority_t thread_qos_get_pthread_priority(int qos) __attribute__((const));
-int thread_qos_get_class_index(int qos) __attribute__((const));
-int pthread_priority_get_thread_qos(pthread_priority_t priority) __attribute__((const));
-int pthread_priority_get_class_index(pthread_priority_t priority) __attribute__((const));
-pthread_priority_t class_index_get_pthread_priority(int index) __attribute__((const));
-int class_index_get_thread_qos(int index) __attribute__((const));
-int qos_class_get_class_index(int qos) __attribute__((const));
-
 #define PTH_DEFAULT_STACKSIZE 512*1024
 #define MAX_PTHREAD_SIZE 64*1024
 
 #define PTH_DEFAULT_STACKSIZE 512*1024
 #define MAX_PTHREAD_SIZE 64*1024
 
@@ -276,29 +186,24 @@ extern thread_call_t psynch_thcall;
 
 struct uthread* current_uthread(void);
 
 
 struct uthread* current_uthread(void);
 
-#define WORKQ_REQTHREADS_THREADREQ   0x1
-#define WORKQ_REQTHREADS_NOEMERGENCY 0x2
-
-// Call for the kernel's kevent system to request threads.  A list of QoS/event
-// counts should be provided, sorted by flags and then QoS class.  If the
-// identity of the thread to handle the request is known, it will be returned.
-// If a new thread must be created, NULL will be returned.
-thread_t _workq_reqthreads(struct proc *p, int requests_count,
-                                                  workq_reqthreads_req_t requests);
+int
+workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr);
 
 
-// Resolve a pthread_priority_t to a QoS/relative pri
-integer_t _thread_qos_from_pthread_priority(unsigned long pri, unsigned long *flags);
-// Clear out extraneous flags/pri info for putting in voucher
-pthread_priority_t _pthread_priority_canonicalize(pthread_priority_t pri, boolean_t for_propagation);
+int
+workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr);
 
 
-boolean_t _workq_thread_has_been_unbound(thread_t th, int qos_class);
+void
+workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
+               mach_port_name_t kport, int th_qos, int setup_flags, int upcall_flags);
 
 
-int workq_kern_threadreq(struct proc *p, workq_threadreq_t req,
-               enum workq_threadreq_type, unsigned long priority, int flags);
+int
+workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
+               user_addr_t stackaddr, mach_port_name_t kport,
+               user_addr_t events, int nevents, int upcall_flags);
 
 
-int workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t req,
-               enum workq_threadreq_op operation,
-               unsigned long arg1, unsigned long arg2);
+void
+workq_markfree_threadstack(proc_t p, thread_t th, vm_map_t vmap,
+               user_addr_t stackaddr);
 
 #endif // KERNEL
 
 
 #endif // KERNEL
 
diff --git a/kern/kern_policy.c b/kern/kern_policy.c
deleted file mode 100644 (file)
index 98e0c61..0000000
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2013 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include "kern_internal.h"
-#include <kern/debug.h>
-#include <kern/assert.h>
-
-pthread_priority_t
-thread_qos_get_pthread_priority(int qos)
-{
-       /* Map the buckets we have in pthread_priority_t into a QoS tier. */
-       switch (qos) {
-               case THREAD_QOS_USER_INTERACTIVE: return _pthread_priority_make_newest(QOS_CLASS_USER_INTERACTIVE, 0, 0);
-               case THREAD_QOS_USER_INITIATED: return _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, 0);
-               case THREAD_QOS_LEGACY: return _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, 0);
-               case THREAD_QOS_UTILITY: return _pthread_priority_make_newest(QOS_CLASS_UTILITY, 0, 0);
-               case THREAD_QOS_BACKGROUND: return _pthread_priority_make_newest(QOS_CLASS_BACKGROUND, 0, 0);
-               case THREAD_QOS_MAINTENANCE: return _pthread_priority_make_newest(QOS_CLASS_MAINTENANCE, 0, 0);
-               default: return _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
-       }
-}
-
-int
-thread_qos_get_class_index(int qos)
-{
-    switch (qos) {
-               case THREAD_QOS_USER_INTERACTIVE: return 0;
-               case THREAD_QOS_USER_INITIATED: return 1;
-               case THREAD_QOS_LEGACY: return 2;
-               case THREAD_QOS_UTILITY: return 3;
-               case THREAD_QOS_BACKGROUND: return 4;
-               case THREAD_QOS_MAINTENANCE: return 5;
-               default: return 2;
-    }
-}
-
-int
-pthread_priority_get_thread_qos(pthread_priority_t priority)
-{
-       /* Map the buckets we have in pthread_priority_t into a QoS tier. */
-       switch (_pthread_priority_get_qos_newest(priority)) {
-               case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
-               case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
-               case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
-               case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
-               case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
-               case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
-               default: return THREAD_QOS_UNSPECIFIED;
-       }
-}
-
-int
-pthread_priority_get_class_index(pthread_priority_t priority)
-{
-       return qos_class_get_class_index(_pthread_priority_get_qos_newest(priority));
-}
-
-pthread_priority_t
-class_index_get_pthread_priority(int index)
-{
-       qos_class_t qos;
-       switch (index) {
-               case 0: qos = QOS_CLASS_USER_INTERACTIVE; break;
-               case 1: qos = QOS_CLASS_USER_INITIATED; break;
-               case 2: qos = QOS_CLASS_DEFAULT; break;
-               case 3: qos = QOS_CLASS_UTILITY; break;
-               case 4: qos = QOS_CLASS_BACKGROUND; break;
-               case 5: qos = QOS_CLASS_MAINTENANCE; break;
-               case 6: assert(index != 6); // EVENT_MANAGER should be handled specially
-               default:
-                       /* Return the utility band if we don't understand the input. */
-                       qos = QOS_CLASS_UTILITY;
-       }
-
-       pthread_priority_t priority;
-       priority = _pthread_priority_make_newest(qos, 0, 0);
-
-       return priority;
-}
-
-int
-class_index_get_thread_qos(int class)
-{
-       int thread_qos;
-       switch (class) {
-               case 0: thread_qos = THREAD_QOS_USER_INTERACTIVE; break;
-               case 1: thread_qos = THREAD_QOS_USER_INITIATED; break;
-               case 2: thread_qos = THREAD_QOS_LEGACY; break;
-               case 3: thread_qos = THREAD_QOS_UTILITY; break;
-               case 4: thread_qos = THREAD_QOS_BACKGROUND; break;
-               case 5: thread_qos = THREAD_QOS_MAINTENANCE; break;
-               case 6: thread_qos = THREAD_QOS_LAST; break;
-               default:
-                       thread_qos = THREAD_QOS_LAST;
-       }
-       return thread_qos;
-}
-
-int
-qos_class_get_class_index(int qos)
-{
-       switch (qos){
-               case QOS_CLASS_USER_INTERACTIVE: return 0;
-               case QOS_CLASS_USER_INITIATED: return 1;
-               case QOS_CLASS_DEFAULT: return 2;
-               case QOS_CLASS_UTILITY: return 3;
-               case QOS_CLASS_BACKGROUND: return 4;
-               case QOS_CLASS_MAINTENANCE: return 5;
-               default:
-                       /* Return the default band if we don't understand the input. */
-                       return 2;
-       }
-}
-
-/**
- * Shims to help the kernel understand pthread_priority_t
- */
-
-integer_t
-_thread_qos_from_pthread_priority(unsigned long priority, unsigned long *flags)
-{
-    if (flags != NULL){
-        *flags = (int)_pthread_priority_get_flags(priority);
-    }
-    int thread_qos = pthread_priority_get_thread_qos(priority);
-    if (thread_qos == THREAD_QOS_UNSPECIFIED && flags != NULL){
-        *flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-    }
-    return thread_qos;
-}
-
-pthread_priority_t
-_pthread_priority_canonicalize(pthread_priority_t priority, boolean_t for_propagation)
-{
-       qos_class_t qos_class;
-       int relpri;
-       unsigned long flags = _pthread_priority_get_flags(priority);
-       _pthread_priority_split_newest(priority, qos_class, relpri);
-
-       if (for_propagation) {
-               flags = 0;
-               if (relpri > 0 || relpri < -15) relpri = 0;
-       } else {
-               if (qos_class == QOS_CLASS_UNSPECIFIED) {
-                       flags = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-               } else if (flags & (_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG|_PTHREAD_PRIORITY_SCHED_PRI_FLAG)){
-                       flags = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-                       qos_class = QOS_CLASS_UNSPECIFIED;
-               } else {
-                       flags &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-               }
-
-               relpri = 0;
-       }
-
-       return _pthread_priority_make_newest(qos_class, relpri, flags);
-}
index 280a18b7565eb8c47b0b1b5e47d94a568298c820..e424cceac63775df0fe150a14de1d316cd3aa847 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -32,8 +32,8 @@
 
 #pragma mark - Front Matter
 
 
 #pragma mark - Front Matter
 
-#define  _PTHREAD_CONDATTR_T
-#define  _PTHREAD_COND_T
+#define _PTHREAD_CONDATTR_T
+#define _PTHREAD_COND_T
 #define _PTHREAD_MUTEXATTR_T
 #define _PTHREAD_MUTEX_T
 #define _PTHREAD_RWLOCKATTR_T
 #define _PTHREAD_MUTEXATTR_T
 #define _PTHREAD_MUTEX_T
 #define _PTHREAD_RWLOCKATTR_T
@@ -105,11 +105,11 @@ extern void panic(const char *string, ...) __printflike(1,2) __dead2;
 #include <libkern/OSAtomic.h>
 #include <libkern/libkern.h>
 
 #include <libkern/OSAtomic.h>
 #include <libkern/libkern.h>
 
-#include <sys/pthread_shims.h>
 #include "kern_internal.h"
 
 #include "kern_internal.h"
 
-// XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
-#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
+#ifndef WQ_SETUP_EXIT_THREAD
+#define WQ_SETUP_EXIT_THREAD    8
+#endif
 
 // XXX: Ditto for thread tags from kern/thread.h
 #define        THREAD_TAG_MAINTHREAD 0x1
 
 // XXX: Ditto for thread tags from kern/thread.h
 #define        THREAD_TAG_MAINTHREAD 0x1
@@ -120,53 +120,13 @@ lck_grp_attr_t   *pthread_lck_grp_attr;
 lck_grp_t    *pthread_lck_grp;
 lck_attr_t   *pthread_lck_attr;
 
 lck_grp_t    *pthread_lck_grp;
 lck_attr_t   *pthread_lck_attr;
 
-zone_t pthread_zone_workqueue;
-zone_t pthread_zone_threadlist;
-zone_t pthread_zone_threadreq;
-
-extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
-extern void workqueue_thread_yielded(void);
-
-#define WQ_SETUP_FIRST_USE  1
-#define WQ_SETUP_CLEAR_VOUCHER  2
-static void _setup_wqthread(proc_t p, thread_t th, struct workqueue *wq,
-               struct threadlist *tl, int flags);
-
-static void reset_priority(struct threadlist *tl, pthread_priority_t pri);
-static pthread_priority_t pthread_priority_from_wq_class_index(struct workqueue *wq, int index);
-
-static void wq_unpark_continue(void* ptr, wait_result_t wait_result) __dead2;
-
-static bool workqueue_addnewthread(proc_t p, struct workqueue *wq);
-static void workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use);
-static void workqueue_lock_spin(struct workqueue *);
-static void workqueue_unlock(struct workqueue *);
-
-#define WQ_RUN_TR_THROTTLED 0
-#define WQ_RUN_TR_THREAD_NEEDED 1
-#define WQ_RUN_TR_THREAD_STARTED 2
-#define WQ_RUN_TR_EXITING 3
-static int workqueue_run_threadreq_and_unlock(proc_t p, struct workqueue *wq,
-               struct threadlist *tl, struct threadreq *req, bool may_add_new_thread);
-
-static bool may_start_constrained_thread(struct workqueue *wq,
-               uint32_t at_priclass, struct threadlist *tl, bool may_start_timer);
-
-static mach_vm_offset_t stack_addr_hint(proc_t p, vm_map_t vmap);
-static boolean_t wq_thread_is_busy(uint64_t cur_ts,
-               _Atomic uint64_t *lastblocked_tsp);
-
-int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
-int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
-
-#define WQ_MAXPRI_MIN  0       /* low prio queue num */
-#define WQ_MAXPRI_MAX  2       /* max  prio queuenum */
-#define WQ_PRI_NUM     3       /* number of prio work queues */
-
 #define C_32_STK_ALIGN          16
 #define C_64_STK_ALIGN          16
 #define C_64_REDZONE_LEN        128
 
 #define C_32_STK_ALIGN          16
 #define C_64_STK_ALIGN          16
 #define C_64_REDZONE_LEN        128
 
+// WORKQ use the largest alignment any platform needs
+#define C_WORKQ_STK_ALIGN       16
+
 #define PTHREAD_T_OFFSET 0
 
 /*
 #define PTHREAD_T_OFFSET 0
 
 /*
@@ -177,11 +137,12 @@ _________________________________________
 -----------------------------------------
 */
 
 -----------------------------------------
 */
 
-#define PTHREAD_START_CUSTOM           0x01000000
+#define PTHREAD_START_CUSTOM           0x01000000 // <rdar://problem/34501401>
 #define PTHREAD_START_SETSCHED         0x02000000
 #define PTHREAD_START_SETSCHED         0x02000000
-#define PTHREAD_START_DETACHED         0x04000000
+// was PTHREAD_START_DETACHED          0x04000000
 #define PTHREAD_START_QOSCLASS         0x08000000
 #define PTHREAD_START_TSD_BASE_SET     0x10000000
 #define PTHREAD_START_QOSCLASS         0x08000000
 #define PTHREAD_START_TSD_BASE_SET     0x10000000
+#define PTHREAD_START_SUSPENDED                0x20000000
 #define PTHREAD_START_QOSCLASS_MASK    0x00ffffff
 #define PTHREAD_START_POLICY_BITSHIFT 16
 #define PTHREAD_START_POLICY_MASK 0xff
 #define PTHREAD_START_QOSCLASS_MASK    0x00ffffff
 #define PTHREAD_START_POLICY_BITSHIFT 16
 #define PTHREAD_START_POLICY_MASK 0xff
@@ -193,199 +154,13 @@ _________________________________________
 
 #define BASEPRI_DEFAULT 31
 
 
 #define BASEPRI_DEFAULT 31
 
-#pragma mark sysctls
-
-static uint32_t wq_stalled_window_usecs        = WQ_STALLED_WINDOW_USECS;
-static uint32_t wq_reduce_pool_window_usecs    = WQ_REDUCE_POOL_WINDOW_USECS;
-static uint32_t wq_max_timer_interval_usecs    = WQ_MAX_TIMER_INTERVAL_USECS;
-static uint32_t wq_max_threads                 = WORKQUEUE_MAXTHREADS;
-static uint32_t wq_max_constrained_threads     = WORKQUEUE_MAXTHREADS / 8;
-static uint32_t wq_max_concurrency[WORKQUEUE_NUM_BUCKETS + 1]; // set to ncpus on load
-
-SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_stalled_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_reduce_pool_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_timer_interval_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_threads, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_constrained_threads, 0, "");
-
-#ifdef DEBUG
-static int wq_kevent_test SYSCTL_HANDLER_ARGS;
-SYSCTL_PROC(_debug, OID_AUTO, wq_kevent_test, CTLFLAG_MASKED | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLTYPE_OPAQUE, NULL, 0, wq_kevent_test, 0, "-");
-#endif
-
-static uint32_t wq_init_constrained_limit = 1;
-
 uint32_t pthread_debug_tracing = 1;
 
 uint32_t pthread_debug_tracing = 1;
 
-SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
-                  &pthread_debug_tracing, 0, "")
-
 static uint32_t pthread_mutex_default_policy;
 
 SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED,
           &pthread_mutex_default_policy, 0, "");
 
 static uint32_t pthread_mutex_default_policy;
 
 SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED,
           &pthread_mutex_default_policy, 0, "");
 
-/*
- *       +-----+-----+-----+-----+-----+-----+-----+
- *       | MT  | BG  | UT  | DE  | IN  | UN  | mgr |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | pri |  5  |  4  |  3  |  2  |  1  |  0  |  6  |
- * | qos |  1  |  2  |  3  |  4  |  5  |  6  |  7  |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- */
-static inline uint32_t
-_wq_bucket_to_thread_qos(int pri)
-{
-       if (pri == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               return WORKQUEUE_EVENT_MANAGER_BUCKET + 1;
-       }
-       return WORKQUEUE_EVENT_MANAGER_BUCKET - pri;
-}
-
-#pragma mark wq_thactive
-
-#if defined(__LP64__)
-// Layout is:
-//   7 * 16 bits for each QoS bucket request count (including manager)
-//   3 bits of best QoS among all pending constrained requests
-//   13 bits of zeroes
-#define WQ_THACTIVE_BUCKET_WIDTH 16
-#define WQ_THACTIVE_QOS_SHIFT    (7 * WQ_THACTIVE_BUCKET_WIDTH)
-#else
-// Layout is:
-//   6 * 10 bits for each QoS bucket request count (except manager)
-//   1 bit for the manager bucket
-//   3 bits of best QoS among all pending constrained requests
-#define WQ_THACTIVE_BUCKET_WIDTH 10
-#define WQ_THACTIVE_QOS_SHIFT    (6 * WQ_THACTIVE_BUCKET_WIDTH + 1)
-#endif
-#define WQ_THACTIVE_BUCKET_MASK  ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1)
-#define WQ_THACTIVE_BUCKET_HALF  (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1))
-#define WQ_THACTIVE_NO_PENDING_REQUEST 6
-
-_Static_assert(sizeof(wq_thactive_t) * CHAR_BIT - WQ_THACTIVE_QOS_SHIFT >= 3,
-               "Make sure we have space to encode a QoS");
-
-static inline wq_thactive_t
-_wq_thactive_fetch_and_add(struct workqueue *wq, wq_thactive_t offset)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
-       return atomic_fetch_add_explicit(&wq->wq_thactive, offset,
-                       memory_order_relaxed);
-#else
-       return pthread_kern->atomic_fetch_add_128_relaxed(&wq->wq_thactive, offset);
-#endif
-}
-
-static inline wq_thactive_t
-_wq_thactive(struct workqueue *wq)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
-       return atomic_load_explicit(&wq->wq_thactive, memory_order_relaxed);
-#else
-       return pthread_kern->atomic_load_128_relaxed(&wq->wq_thactive);
-#endif
-}
-
-#define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \
-               ((tha) >> WQ_THACTIVE_QOS_SHIFT)
-
-static inline uint32_t
-_wq_thactive_best_constrained_req_qos(struct workqueue *wq)
-{
-       // Avoid expensive atomic operations: the three bits we're loading are in
-       // a single byte, and always updated under the workqueue lock
-       wq_thactive_t v = *(wq_thactive_t *)&wq->wq_thactive;
-       return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v);
-}
-
-static inline wq_thactive_t
-_wq_thactive_set_best_constrained_req_qos(struct workqueue *wq,
-               uint32_t orig_qos, uint32_t new_qos)
-{
-       wq_thactive_t v;
-       v = (wq_thactive_t)(new_qos - orig_qos) << WQ_THACTIVE_QOS_SHIFT;
-       /*
-        * We can do an atomic add relative to the initial load because updates
-        * to this qos are always serialized under the workqueue lock.
-        */
-       return _wq_thactive_fetch_and_add(wq, v) + v;
-}
-
-static inline wq_thactive_t
-_wq_thactive_offset_for_qos(int qos)
-{
-       return (wq_thactive_t)1 << (qos * WQ_THACTIVE_BUCKET_WIDTH);
-}
-
-static inline wq_thactive_t
-_wq_thactive_inc(struct workqueue *wq, int qos)
-{
-       return _wq_thactive_fetch_and_add(wq, _wq_thactive_offset_for_qos(qos));
-}
-
-static inline wq_thactive_t
-_wq_thactive_dec(struct workqueue *wq, int qos)
-{
-       return _wq_thactive_fetch_and_add(wq, -_wq_thactive_offset_for_qos(qos));
-}
-
-static inline wq_thactive_t
-_wq_thactive_move(struct workqueue *wq, int oldqos, int newqos)
-{
-       return _wq_thactive_fetch_and_add(wq, _wq_thactive_offset_for_qos(newqos) -
-                       _wq_thactive_offset_for_qos(oldqos));
-}
-
-static inline uint32_t
-_wq_thactive_aggregate_downto_qos(struct workqueue *wq, wq_thactive_t v,
-               int qos, uint32_t *busycount, uint32_t *max_busycount)
-{
-       uint32_t count = 0, active;
-       uint64_t curtime;
-
-#ifndef __LP64__
-       /*
-        * on 32bits the manager bucket is a single bit and the best constrained
-        * request QoS 3 bits are where the 10 bits of a regular QoS bucket count
-        * would be. Mask them out.
-        */
-       v &= ~(~0ull << WQ_THACTIVE_QOS_SHIFT);
-#endif
-       if (busycount) {
-               curtime = mach_absolute_time();
-               *busycount = 0;
-       }
-       if (max_busycount) {
-               *max_busycount = qos + 1;
-       }
-       for (int i = 0; i <= qos; i++, v >>= WQ_THACTIVE_BUCKET_WIDTH) {
-               active = v & WQ_THACTIVE_BUCKET_MASK;
-               count += active;
-               if (busycount && wq->wq_thscheduled_count[i] > active) {
-                       if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i])) {
-                               /*
-                                * We only consider the last blocked thread for a given bucket
-                                * as busy because we don't want to take the list lock in each
-                                * sched callback. However this is an approximation that could
-                                * contribute to thread creation storms.
-                                */
-                               (*busycount)++;
-                       }
-               }
-       }
-       return count;
-}
-
 #pragma mark - Process/Thread Setup/Teardown syscalls
 
 static mach_vm_offset_t
 #pragma mark - Process/Thread Setup/Teardown syscalls
 
 static mach_vm_offset_t
@@ -445,41 +220,45 @@ stack_addr_hint(proc_t p, vm_map_t vmap)
        return stackaddr;
 }
 
        return stackaddr;
 }
 
+static bool
+_pthread_priority_to_policy(pthread_priority_t priority,
+               thread_qos_policy_data_t *data)
+{
+       data->qos_tier = _pthread_priority_thread_qos(priority);
+       data->tier_importance = _pthread_priority_relpri(priority);
+       if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 ||
+                       data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
+               return false;
+       }
+       return true;
+}
+
 /**
  * bsdthread_create system call.  Used by pthread_create.
  */
 int
 /**
  * bsdthread_create system call.  Used by pthread_create.
  */
 int
-_bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
+_bsdthread_create(struct proc *p,
+               __unused user_addr_t user_func, __unused user_addr_t user_funcarg,
+               user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags,
+               user_addr_t *retval)
 {
        kern_return_t kret;
        void * sright;
        int error = 0;
 {
        kern_return_t kret;
        void * sright;
        int error = 0;
-       int allocated = 0;
-       mach_vm_offset_t stackaddr;
-       mach_vm_size_t th_allocsize = 0;
-       mach_vm_size_t th_guardsize;
-       mach_vm_offset_t th_stack;
-       mach_vm_offset_t th_pthread;
        mach_vm_offset_t th_tsd_base;
        mach_port_name_t th_thport;
        thread_t th;
        mach_vm_offset_t th_tsd_base;
        mach_port_name_t th_thport;
        thread_t th;
-       vm_map_t vmap = pthread_kern->current_map();
        task_t ctask = current_task();
        unsigned int policy, importance;
        uint32_t tsd_offset;
        task_t ctask = current_task();
        unsigned int policy, importance;
        uint32_t tsd_offset;
-
-       int isLP64 = 0;
+       bool start_suspended = (flags & PTHREAD_START_SUSPENDED);
 
        if (pthread_kern->proc_get_register(p) == 0) {
                return EINVAL;
        }
 
 
        if (pthread_kern->proc_get_register(p) == 0) {
                return EINVAL;
        }
 
-       PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0);
-
-       isLP64 = proc_is64bit(p);
-       th_guardsize = vm_map_page_size(vmap);
+       PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0);
 
 
-       stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
        kret = pthread_kern->thread_create(ctask, &th);
        if (kret != KERN_SUCCESS)
                return(ENOMEM);
        kret = pthread_kern->thread_create(ctask, &th);
        if (kret != KERN_SUCCESS)
                return(ENOMEM);
@@ -495,152 +274,64 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
        }
 
        if ((flags & PTHREAD_START_CUSTOM) == 0) {
        }
 
        if ((flags & PTHREAD_START_CUSTOM) == 0) {
-               mach_vm_size_t pthread_size =
-                       vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(vmap));
-               th_allocsize = th_guardsize + user_stack + pthread_size;
-               user_stack += PTHREAD_T_OFFSET;
-
-               kret = mach_vm_map(vmap, &stackaddr,
-                               th_allocsize,
-                               page_size-1,
-                               VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
-                               0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-                               VM_INHERIT_DEFAULT);
-               if (kret != KERN_SUCCESS){
-                       kret = mach_vm_allocate(vmap,
-                                       &stackaddr, th_allocsize,
-                                       VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
-               }
-               if (kret != KERN_SUCCESS) {
-                       error = ENOMEM;
-                       goto out;
-               }
-
-               PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
-
-               allocated = 1;
-               /*
-                * The guard page is at the lowest address
-                * The stack base is the highest address
-                */
-               kret = mach_vm_protect(vmap,  stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
-
-               if (kret != KERN_SUCCESS) {
-                       error = ENOMEM;
-                       goto out1;
-               }
-
-               th_pthread = stackaddr + th_guardsize + user_stack;
-               th_stack = th_pthread;
-
-               /*
-               * Pre-fault the first page of the new thread's stack and the page that will
-               * contain the pthread_t structure.
-               */
-               if (vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
-                               vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap))){
-                       vm_fault( vmap,
-                                       vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
-                                       VM_PROT_READ | VM_PROT_WRITE,
-                                       FALSE,
-                                       THREAD_UNINT, NULL, 0);
-               }
-
-               vm_fault( vmap,
-                               vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap)),
-                               VM_PROT_READ | VM_PROT_WRITE,
-                               FALSE,
-                               THREAD_UNINT, NULL, 0);
-
-       } else {
-               th_stack = user_stack;
-               th_pthread = user_pthread;
-
-               PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
+               error = EINVAL;
+               goto out;
        }
 
        }
 
+       PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3);
+
        tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
        if (tsd_offset) {
        tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
        if (tsd_offset) {
-               th_tsd_base = th_pthread + tsd_offset;
+               th_tsd_base = user_pthread + tsd_offset;
                kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
                if (kret == KERN_SUCCESS) {
                        flags |= PTHREAD_START_TSD_BASE_SET;
                }
        }
                kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
                if (kret == KERN_SUCCESS) {
                        flags |= PTHREAD_START_TSD_BASE_SET;
                }
        }
+       /*
+        * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
+        * supports this flag (after the fact).
+        */
+       flags &= ~PTHREAD_START_SUSPENDED;
 
 
-#if defined(__i386__) || defined(__x86_64__)
        /*
        /*
-        * Set up i386 registers & function call.
+        * Set up registers & function call.
         */
         */
-       if (isLP64 == 0) {
-               x86_thread_state32_t state = {
-                       .eip = (unsigned int)pthread_kern->proc_get_threadstart(p),
-                       .eax = (unsigned int)th_pthread,
-                       .ebx = (unsigned int)th_thport,
-                       .ecx = (unsigned int)user_func,
-                       .edx = (unsigned int)user_funcarg,
-                       .edi = (unsigned int)user_stack,
-                       .esi = (unsigned int)flags,
-                       /*
-                        * set stack pointer
-                        */
-                       .esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
+#if defined(__i386__) || defined(__x86_64__)
+       if (proc_is64bit_data(p)) {
+               x86_thread_state64_t state = {
+                       .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
+                       .rdi = (uint64_t)user_pthread,
+                       .rsi = (uint64_t)th_thport,
+                       .rdx = (uint64_t)user_func,    /* golang wants this */
+                       .rcx = (uint64_t)user_funcarg, /* golang wants this */
+                       .r8  = (uint64_t)user_stack,   /* golang wants this */
+                       .r9  = (uint64_t)flags,
+
+                       .rsp = (uint64_t)(user_stack - C_64_REDZONE_LEN)
                };
 
                };
 
-               error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-               if (error != KERN_SUCCESS) {
-                       error = EINVAL;
-                       goto out;
-               }
+               (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
        } else {
        } else {
-               x86_thread_state64_t state64 = {
-                       .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
-                       .rdi = (uint64_t)th_pthread,
-                       .rsi = (uint64_t)(th_thport),
-                       .rdx = (uint64_t)user_func,
-                       .rcx = (uint64_t)user_funcarg,
-                       .r8 = (uint64_t)user_stack,
-                       .r9 = (uint64_t)flags,
-                       /*
-                        * set stack pointer aligned to 16 byte boundary
-                        */
-                       .rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN)
+               x86_thread_state32_t state = {
+                       .eip = (uint32_t)pthread_kern->proc_get_threadstart(p),
+                       .eax = (uint32_t)user_pthread,
+                       .ebx = (uint32_t)th_thport,
+                       .ecx = (uint32_t)user_func,    /* golang wants this */
+                       .edx = (uint32_t)user_funcarg, /* golang wants this */
+                       .edi = (uint32_t)user_stack,   /* golang wants this */
+                       .esi = (uint32_t)flags,
+
+                       .esp = (int)((vm_offset_t)(user_stack - C_32_STK_ALIGN))
                };
 
                };
 
-               error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
-               if (error != KERN_SUCCESS) {
-                       error = EINVAL;
-                       goto out;
-               }
-
+               (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
        }
        }
-#elif defined(__arm__)
-       arm_thread_state_t state = {
-               .pc = (int)pthread_kern->proc_get_threadstart(p),
-               .r[0] = (unsigned int)th_pthread,
-               .r[1] = (unsigned int)th_thport,
-               .r[2] = (unsigned int)user_func,
-               .r[3] = (unsigned int)user_funcarg,
-               .r[4] = (unsigned int)user_stack,
-               .r[5] = (unsigned int)flags,
-
-               /* Set r7 & lr to 0 for better back tracing */
-               .r[7] = 0,
-               .lr = 0,
-
-               /*
-                * set stack pointer
-                */
-               .sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
-       };
-
-       (void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-
 #else
 #error bsdthread_create  not defined for this architecture
 #endif
 
 #else
 #error bsdthread_create  not defined for this architecture
 #endif
 
-       if ((flags & PTHREAD_START_SETSCHED) != 0) {
+       if (flags & PTHREAD_START_SETSCHED) {
                /* Set scheduling parameters if needed */
                thread_extended_policy_data_t    extinfo;
                thread_precedence_policy_data_t   precedinfo;
                /* Set scheduling parameters if needed */
                thread_extended_policy_data_t    extinfo;
                thread_precedence_policy_data_t   precedinfo;
@@ -658,16 +349,16 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 
                precedinfo.importance = (importance - BASEPRI_DEFAULT);
                thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
 
                precedinfo.importance = (importance - BASEPRI_DEFAULT);
                thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
-       } else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
+       } else if (flags & PTHREAD_START_QOSCLASS) {
                /* Set thread QoS class if requested. */
                /* Set thread QoS class if requested. */
-               pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK);
-
                thread_qos_policy_data_t qos;
                thread_qos_policy_data_t qos;
-               qos.qos_tier = pthread_priority_get_thread_qos(priority);
-               qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 :
-                               _pthread_priority_get_relpri(priority);
 
 
-               pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+               if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) {
+                       error = EINVAL;
+                       goto out;
+               }
+               pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY,
+                               (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
        }
 
        if (pthread_kern->proc_get_mach_thread_self_tsd_offset) {
        }
 
        if (pthread_kern->proc_get_mach_thread_self_tsd_offset) {
@@ -677,37 +368,33 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
                        bool proc64bit = proc_is64bit(p);
                        if (proc64bit) {
                                uint64_t th_thport_tsd = (uint64_t)th_thport;
                        bool proc64bit = proc_is64bit(p);
                        if (proc64bit) {
                                uint64_t th_thport_tsd = (uint64_t)th_thport;
-                               error = copyout(&th_thport_tsd, th_pthread + tsd_offset +
+                               error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
                                                mach_thread_self_offset, sizeof(th_thport_tsd));
                        } else {
                                uint32_t th_thport_tsd = (uint32_t)th_thport;
                                                mach_thread_self_offset, sizeof(th_thport_tsd));
                        } else {
                                uint32_t th_thport_tsd = (uint32_t)th_thport;
-                               error = copyout(&th_thport_tsd, th_pthread + tsd_offset +
+                               error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
                                                mach_thread_self_offset, sizeof(th_thport_tsd));
                        }
                        if (error) {
                                                mach_thread_self_offset, sizeof(th_thport_tsd));
                        }
                        if (error) {
-                               goto out1;
+                               goto out;
                        }
                }
        }
 
                        }
                }
        }
 
-       kret = pthread_kern->thread_resume(th);
-       if (kret != KERN_SUCCESS) {
-               error = EINVAL;
-               goto out1;
+       if (!start_suspended) {
+               kret = pthread_kern->thread_resume(th);
+               if (kret != KERN_SUCCESS) {
+                       error = EINVAL;
+                       goto out;
+               }
        }
        thread_deallocate(th);  /* drop the creator reference */
 
        }
        thread_deallocate(th);  /* drop the creator reference */
 
-       PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
-
-       // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms
-       *retval = (user_addr_t)th_pthread;
+       PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0);
 
 
+       *retval = user_pthread;
        return(0);
 
        return(0);
 
-out1:
-       if (allocated != 0) {
-               (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
-       }
 out:
        (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
        if (pthread_kern->thread_will_park_or_terminate) {
 out:
        (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
        if (pthread_kern->thread_will_park_or_terminate) {
@@ -737,21 +424,24 @@ _bsdthread_terminate(__unused struct proc *p,
        freeaddr = (mach_vm_offset_t)stackaddr;
        freesize = size;
 
        freeaddr = (mach_vm_offset_t)stackaddr;
        freesize = size;
 
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
+       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff);
 
        if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
                if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){
                        vm_map_t user_map = pthread_kern->current_map();
                        freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map));
                        kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
 
        if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
                if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){
                        vm_map_t user_map = pthread_kern->current_map();
                        freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map));
                        kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
-                       assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
+#if MACH_ASSERT
+                       if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) {
+                               os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret);
+                       }
+#endif
                        kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE);
                        assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
                } else {
                        kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
                        if (kret != KERN_SUCCESS) {
                        kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE);
                        assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
                } else {
                        kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
                        if (kret != KERN_SUCCESS) {
-                               PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
-                               return(EINVAL);
+                               PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
                        }
                }
        }
                        }
                }
        }
@@ -761,10 +451,9 @@ _bsdthread_terminate(__unused struct proc *p,
        }
        (void)thread_terminate(th);
        if (sem != MACH_PORT_NULL) {
        }
        (void)thread_terminate(th);
        if (sem != MACH_PORT_NULL) {
-                kret = pthread_kern->semaphore_signal_internal_trap(sem);
+               kret = pthread_kern->semaphore_signal_internal_trap(sem);
                if (kret != KERN_SUCCESS) {
                if (kret != KERN_SUCCESS) {
-                       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
-                       return(EINVAL);
+                       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
                }
        }
 
                }
        }
 
@@ -772,14 +461,10 @@ _bsdthread_terminate(__unused struct proc *p,
                pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
        }
 
                pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
        }
 
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0);
+       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0);
 
        pthread_kern->thread_exception_return();
 
        pthread_kern->thread_exception_return();
-       panic("bsdthread_terminate: still running\n");
-
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0);
-
-       return(0);
+       __builtin_unreachable();
 }
 
 /**
 }
 
 /**
@@ -873,29 +558,35 @@ _bsdthread_register(struct proc *p,
        if (pthread_init_data != 0) {
                /* Outgoing data that userspace expects as a reply */
                data.version = sizeof(struct _pthread_registration_data);
        if (pthread_init_data != 0) {
                /* Outgoing data that userspace expects as a reply */
                data.version = sizeof(struct _pthread_registration_data);
+               data.main_qos = _pthread_unspecified_priority();
+
                if (pthread_kern->qos_main_thread_active()) {
                        mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
                        thread_qos_policy_data_t qos;
                        boolean_t gd = FALSE;
 
                if (pthread_kern->qos_main_thread_active()) {
                        mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
                        thread_qos_policy_data_t qos;
                        boolean_t gd = FALSE;
 
-                       kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
+                       kr = pthread_kern->thread_policy_get(current_thread(),
+                                       THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
                        if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
                        if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
-                               /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
+                               /*
+                                * Unspecified threads means the kernel wants us
+                                * to impose legacy upon the thread.
+                                */
                                qos.qos_tier = THREAD_QOS_LEGACY;
                                qos.tier_importance = 0;
 
                                qos.qos_tier = THREAD_QOS_LEGACY;
                                qos.tier_importance = 0;
 
-                               kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+                               kr = pthread_kern->thread_policy_set_internal(current_thread(),
+                                               THREAD_QOS_POLICY, (thread_policy_t)&qos,
+                                               THREAD_QOS_POLICY_COUNT);
                        }
 
                        if (kr == KERN_SUCCESS) {
                        }
 
                        if (kr == KERN_SUCCESS) {
-                               data.main_qos = thread_qos_get_pthread_priority(qos.qos_tier);
-                       } else {
-                               data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+                               data.main_qos = _pthread_priority_make_from_thread_qos(
+                                               qos.qos_tier, 0, 0);
                        }
                        }
-               } else {
-                       data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
                }
 
                }
 
+               data.stack_addr_hint = stackaddr;
                data.mutex_default_policy = pthread_mutex_default_policy;
 
                kr = copyout(&data, pthread_init_data, pthread_init_sz);
                data.mutex_default_policy = pthread_mutex_default_policy;
 
                kr = copyout(&data, pthread_init_data, pthread_init_sz);
@@ -910,2858 +601,220 @@ _bsdthread_register(struct proc *p,
        return(0);
 }
 
        return(0);
 }
 
-#pragma mark - QoS Manipulation
+
+#pragma mark - Workqueue Thread Support
+
+static mach_vm_size_t
+workq_thread_allocsize(proc_t p, vm_map_t wq_map,
+               mach_vm_size_t *guardsize_out)
+{
+       mach_vm_size_t guardsize = vm_map_page_size(wq_map);
+       mach_vm_size_t pthread_size = vm_map_round_page_mask(
+                       pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET,
+                       vm_map_page_mask(wq_map));
+       if (guardsize_out) *guardsize_out = guardsize;
+       return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
+}
 
 int
 
 int
-_bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
+workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr)
 {
 {
-       int rv;
-       thread_t th;
+       mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
+       mach_vm_size_t guardsize, th_allocsize;
+       kern_return_t kret;
 
 
-       pthread_priority_t priority;
+       th_allocsize = workq_thread_allocsize(p, vmap, &guardsize);
+       kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1,
+                       VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE,
+                       VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
 
 
-       /* Unused parameters must be zero. */
-       if (arg3 != 0) {
-               return EINVAL;
+       if (kret != KERN_SUCCESS) {
+               kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize,
+                               VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
        }
 
        }
 
-       /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
-       if (proc_is64bit(p)) {
-               uint64_t v;
-               rv = copyin(tsd_priority_addr, &v, sizeof(v));
-               if (rv) goto out;
-               priority = (int)(v & 0xffffffff);
-       } else {
-               uint32_t v;
-               rv = copyin(tsd_priority_addr, &v, sizeof(v));
-               if (rv) goto out;
-               priority = v;
+       if (kret != KERN_SUCCESS) {
+               goto fail;
        }
 
        }
 
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
+       /*
+        * The guard page is at the lowest address
+        * The stack base is the highest address
+        */
+       kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE);
+       if (kret != KERN_SUCCESS) {
+               goto fail_vm_deallocate;
        }
 
        }
 
-       /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
-       if (th != current_thread()) {
-               thread_deallocate(th);
-               return EPERM;
+       if (out_addr) {
+               *out_addr = stackaddr;
        }
        }
+       return 0;
 
 
-       rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval);
-
-       /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
-       /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
-
-       thread_deallocate(th);
-
-out:
-       return rv;
+fail_vm_deallocate:
+       (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
+fail:
+       return kret;
 }
 
 }
 
-static inline struct threadlist *
-util_get_thread_threadlist_entry(thread_t th)
+int
+workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr)
 {
 {
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       if (uth) {
-               struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-               return tl;
-       }
-       return NULL;
+       return mach_vm_deallocate(vmap, stackaddr,
+                       workq_thread_allocsize(p, vmap, NULL));
 }
 
 }
 
-boolean_t
-_workq_thread_has_been_unbound(thread_t th, int qos_class)
-{
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (!tl) {
-               return FALSE;
+void
+workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th,
+               vm_map_t vmap, user_addr_t stackaddr)
+{
+       // Keep this in sync with workq_setup_thread()
+       const vm_size_t       guardsize = vm_map_page_size(vmap);
+       const user_addr_t     freeaddr = (user_addr_t)stackaddr + guardsize;
+       const vm_map_offset_t freesize = vm_map_trunc_page_mask(
+                       (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1,
+                       vm_map_page_mask(vmap)) - guardsize;
+
+       __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr,
+                       freesize, VM_BEHAVIOR_REUSABLE);
+#if MACH_ASSERT
+       if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
+               os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
        }
        }
+#endif
+}
 
 
-       struct workqueue *wq = tl->th_workq;
-       workqueue_lock_spin(wq);
-
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               goto failure;
-       } else if (qos_class != class_index_get_thread_qos(tl->th_priority)) {
-               goto failure;
-       }
+struct workq_thread_addrs {
+       user_addr_t self;
+       user_addr_t stack_bottom;
+       user_addr_t stack_top;
+};
 
 
-       if ((tl->th_flags & TH_LIST_KEVENT_BOUND)){
-               goto failure;
-       }
-       tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
+static inline void
+workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr)
+{
+       th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN);
+}
 
 
-       workqueue_unlock(wq);
-       return TRUE;
+static void
+workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr,
+                                          struct workq_thread_addrs *th_addrs)
+{
+       const vm_size_t guardsize = vm_map_page_size(map);
 
 
-failure:
-       workqueue_unlock(wq);
-       return FALSE;
+       th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE +
+                       guardsize + PTHREAD_T_OFFSET);
+       workq_thread_set_top_addr(th_addrs, th_addrs->self);
+       th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize);
 }
 
 }
 
-int
-_bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval)
+static inline void
+workq_set_register_state(proc_t p, thread_t th,
+               struct workq_thread_addrs *addrs, mach_port_name_t kport,
+               user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count)
 {
 {
-       thread_qos_policy_data_t qos;
-       mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
-       boolean_t gd = FALSE;
-       thread_t th = current_thread();
-       struct workqueue *wq = NULL;
-       struct threadlist *tl = NULL;
+       user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
+       if (!wqstart_fnptr) {
+               panic("workqueue thread start function pointer is NULL");
+       }
 
 
-       kern_return_t kr;
-       int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
+#if defined(__i386__) || defined(__x86_64__)
+       if (proc_is64bit_data(p) == 0) {
+               x86_thread_state32_t state = {
+                       .eip = (unsigned int)wqstart_fnptr,
+                       .eax = /* arg0 */ (unsigned int)addrs->self,
+                       .ebx = /* arg1 */ (unsigned int)kport,
+                       .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom,
+                       .edx = /* arg3 */ (unsigned int)kevent_list,
+                       .edi = /* arg4 */ (unsigned int)upcall_flags,
+                       .esi = /* arg5 */ (unsigned int)kevent_count,
 
 
-       if ((flags & _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND) != 0) {
-               tl = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       wq = tl->th_workq;
-               } else {
-                       goto qos;
+                       .esp = (int)((vm_offset_t)addrs->stack_top),
+               };
+
+               int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
+               if (error != KERN_SUCCESS) {
+                       panic(__func__ ": thread_set_wq_state failed: %d", error);
                }
                }
+       } else {
+               x86_thread_state64_t state64 = {
+                       // x86-64 already passes all the arguments in registers, so we just put them in their final place here
+                       .rip = (uint64_t)wqstart_fnptr,
+                       .rdi = (uint64_t)addrs->self,
+                       .rsi = (uint64_t)kport,
+                       .rdx = (uint64_t)addrs->stack_bottom,
+                       .rcx = (uint64_t)kevent_list,
+                       .r8  = (uint64_t)upcall_flags,
+                       .r9  = (uint64_t)kevent_count,
 
 
-               workqueue_lock_spin(wq);
-               if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
-                       tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
-                       unsigned int kevent_flags = KEVENT_FLAG_WORKQ | KEVENT_FLAG_UNBIND_CHECK_FLAGS;
-                       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                               kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
-                       }
+                       .rsp = (uint64_t)(addrs->stack_top)
+               };
 
 
-                       workqueue_unlock(wq);
-                       __assert_only int ret = kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
-                       assert(ret == 0);
-               } else {
-                       workqueue_unlock(wq);
+               int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
+               if (error != KERN_SUCCESS) {
+                       panic(__func__ ": thread_set_wq_state failed: %d", error);
                }
        }
                }
        }
+#else
+#error setup_wqthread  not defined for this architecture
+#endif
+}
 
 
-qos:
-       if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
-               kr = pthread_kern->thread_policy_get(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
-               if (kr != KERN_SUCCESS) {
-                       qos_rv = EINVAL;
-                       goto voucher;
-               }
+static int
+workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, int upcall_flags,
+               user_addr_t eventlist, int nevents, int kevent_flags,
+               user_addr_t *kevent_list_out, int *kevent_count_out)
+{
+       bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP;
+       int kevent_count = WQ_KEVENT_LIST_LEN;
+       user_addr_t kevent_list = th_addrs->self - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
+       user_addr_t kevent_id_addr = kevent_list;
+       kqueue_id_t kevent_id = -1;
+       int ret;
 
 
+       if (workloop) {
                /*
                /*
-                * If we have main-thread QoS then we don't allow a thread to come out
-                * of QOS_CLASS_UNSPECIFIED.
+                * The kevent ID goes just below the kevent list.  Sufficiently new
+                * userspace will know to look there.  Old userspace will just
+                * ignore it.
                 */
                 */
-               if (pthread_kern->qos_main_thread_active() && qos.qos_tier ==
-                               THREAD_QOS_UNSPECIFIED) {
-                       qos_rv = EPERM;
-                       goto voucher;
-               }
+               kevent_id_addr -= sizeof(kqueue_id_t);
+       }
 
 
-               if (!tl) {
-                       tl = util_get_thread_threadlist_entry(th);
-                       if (tl) wq = tl->th_workq;
-               }
+       user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE;
+       user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE;
 
 
-               PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0);
+       if (workloop) {
+               kevent_flags |= KEVENT_FLAG_WORKLOOP;
+               ret = kevent_id_internal(p, &kevent_id,
+                               eventlist, nevents, kevent_list, kevent_count,
+                               kevent_data_buf, &kevent_data_available,
+                               kevent_flags, &kevent_count);
+               copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id));
+       } else {
+               kevent_flags |= KEVENT_FLAG_WORKQ;
+               ret = kevent_qos_internal(p, -1, eventlist, nevents, kevent_list,
+                               kevent_count, kevent_data_buf, &kevent_data_available,
+                               kevent_flags, &kevent_count);
+       }
 
 
-               qos.qos_tier = pthread_priority_get_thread_qos(priority);
-               qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority);
+       // squash any errors into just empty output
+       if (ret != 0 || kevent_count == -1) {
+               *kevent_list_out = NULL;
+               *kevent_count_out = 0;
+               return ret;
+       }
 
 
-               if (qos.qos_tier == QOS_CLASS_UNSPECIFIED ||
-                               qos.tier_importance > 0 || qos.tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
-                       qos_rv = EINVAL;
-                       goto voucher;
-               }
-
-               /*
-                * If we're a workqueue, the threadlist item priority needs adjusting,
-                * along with the bucket we were running in.
-                */
-               if (tl) {
-                       bool try_run_threadreq = false;
-
-                       workqueue_lock_spin(wq);
-                       kr = pthread_kern->thread_set_workq_qos(th, qos.qos_tier, qos.tier_importance);
-                       assert(kr == KERN_SUCCESS || kr == KERN_TERMINATED);
-
-                       /* Fix up counters. */
-                       uint8_t old_bucket = tl->th_priority;
-                       uint8_t new_bucket = pthread_priority_get_class_index(priority);
-
-                       if (old_bucket != new_bucket) {
-                               _wq_thactive_move(wq, old_bucket, new_bucket);
-                               wq->wq_thscheduled_count[old_bucket]--;
-                               wq->wq_thscheduled_count[new_bucket]++;
-                               if (old_bucket == WORKQUEUE_EVENT_MANAGER_BUCKET ||
-                                               old_bucket < new_bucket) {
-                                       /*
-                                        * if the QoS of the thread was lowered, then this could
-                                        * allow for a higher QoS thread request to run, so we need
-                                        * to reevaluate.
-                                        */
-                                       try_run_threadreq = true;
-                               }
-                               tl->th_priority = new_bucket;
-                       }
-
-                       bool old_overcommit = !(tl->th_flags & TH_LIST_CONSTRAINED);
-                       bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-                       if (!old_overcommit && new_overcommit) {
-                               if (wq->wq_constrained_threads_scheduled-- ==
-                                               wq_max_constrained_threads) {
-                                       try_run_threadreq = true;
-                               }
-                               tl->th_flags &= ~TH_LIST_CONSTRAINED;
-                       } else if (old_overcommit && !new_overcommit) {
-                               wq->wq_constrained_threads_scheduled++;
-                               tl->th_flags |= TH_LIST_CONSTRAINED;
-                       }
-
-                       if (try_run_threadreq) {
-                               workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-                       } else {
-                               workqueue_unlock(wq);
-                       }
-               } else {
-                       kr = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
-                       if (kr != KERN_SUCCESS) {
-                               qos_rv = EINVAL;
-                       }
-               }
-
-               PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0);
-       }
-
-voucher:
-       if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) {
-               kr = pthread_kern->thread_set_voucher_name(voucher);
-               if (kr != KERN_SUCCESS) {
-                       voucher_rv = ENOENT;
-                       goto fixedpri;
-               }
-       }
-
-fixedpri:
-       if (qos_rv) goto done;
-       if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
-               thread_extended_policy_data_t extpol = {.timeshare = 0};
-
-               if (!tl) tl  = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       /* Not allowed on workqueue threads */
-                       fixedpri_rv = ENOTSUP;
-                       goto done;
-               }
-
-               kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
-               if (kr != KERN_SUCCESS) {
-                       fixedpri_rv = EINVAL;
-                       goto done;
-               }
-       } else if ((flags & _PTHREAD_SET_SELF_TIMESHARE_FLAG) != 0) {
-               thread_extended_policy_data_t extpol = {.timeshare = 1};
-
-               if (!tl) tl = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       /* Not allowed on workqueue threads */
-                       fixedpri_rv = ENOTSUP;
-                       goto done;
-               }
-
-               kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
-               if (kr != KERN_SUCCESS) {
-                       fixedpri_rv = EINVAL;
-                       goto done;
-               }
-       }
-
-done:
-       if (qos_rv && voucher_rv) {
-               /* Both failed, give that a unique error. */
-               return EBADMSG;
-       }
-
-       if (qos_rv) {
-               return qos_rv;
-       }
-
-       if (voucher_rv) {
-               return voucher_rv;
-       }
-
-       if (fixedpri_rv) {
-               return fixedpri_rv;
-       }
-
-       return 0;
-}
-
-int
-_bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
-{
-       thread_t th;
-       int rv = 0;
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       int override_qos = pthread_priority_get_thread_qos(priority);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (tl) {
-               PTHREAD_TRACE_WQ(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
-       }
-
-       /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
-       pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
-                       resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE, USER_ADDR_NULL, MACH_PORT_NULL);
-       thread_deallocate(th);
-       return rv;
-}
-
-int
-_bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int __unused *retval)
-{
-       thread_t th;
-       int rv = 0;
-
-       if (arg3 != 0) {
-               return EINVAL;
-       }
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (tl) {
-               PTHREAD_TRACE_WQ(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 0, 0, 0);
-       }
-
-       pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
-
-       thread_deallocate(th);
-       return rv;
-}
-
-static int
-_bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, user_addr_t ulock_addr)
-{
-       thread_t th;
-       int rv = 0;
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       int override_qos = pthread_priority_get_thread_qos(priority);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (!tl) {
-               thread_deallocate(th);
-               return EPERM;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
-
-       rv = pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
-                       resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE, ulock_addr, kport);
-
-       thread_deallocate(th);
-       return rv;
-}
-
-int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd,
-               mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
-{
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, resource, USER_ADDR_NULL);
-}
-
-int
-_bsdthread_ctl_qos_override_dispatch(struct proc *p __unused, user_addr_t cmd __unused, mach_port_name_t kport, pthread_priority_t priority, user_addr_t ulock_addr, int __unused *retval)
-{
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, USER_ADDR_NULL, ulock_addr);
-}
-
-int
-_bsdthread_ctl_qos_override_reset(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
-{
-       if (arg1 != 0 || arg2 != 0 || arg3 != 0) {
-               return EINVAL;
-       }
-
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, 1 /* reset_all */, 0, 0, retval);
-}
-
-int
-_bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval)
-{
-       if ((reset_all && (resource != 0)) || arg3 != 0) {
-               return EINVAL;
-       }
-
-       thread_t th = current_thread();
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-
-       if (!tl) {
-               return EPERM;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, 0, 0, 0, 0);
-
-       resource = reset_all ? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD : resource;
-       pthread_kern->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
-
-       return 0;
-}
-
-static int
-_bsdthread_ctl_max_parallelism(struct proc __unused *p, user_addr_t __unused cmd,
-               int qos, unsigned long flags, int *retval)
-{
-       _Static_assert(QOS_PARALLELISM_COUNT_LOGICAL ==
-                       _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL, "logical");
-       _Static_assert(QOS_PARALLELISM_REALTIME ==
-                       _PTHREAD_QOS_PARALLELISM_REALTIME, "realtime");
-
-       if (flags & ~(QOS_PARALLELISM_REALTIME | QOS_PARALLELISM_COUNT_LOGICAL)) {
-               return EINVAL;
-       }
-
-       if (flags & QOS_PARALLELISM_REALTIME) {
-               if (qos) {
-                       return EINVAL;
-               }
-       } else if (qos == THREAD_QOS_UNSPECIFIED || qos >= THREAD_QOS_LAST) {
-               return EINVAL;
-       }
-
-       *retval = pthread_kern->qos_max_parallelism(qos, flags);
-       return 0;
-}
-
-int
-_bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
-{
-       switch (cmd) {
-       case BSDTHREAD_CTL_SET_QOS:
-               return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_START:
-               return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_END:
-               return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
-               return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
-               return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD:
-               return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET:
-               return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, (int)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_SET_SELF:
-               return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval);
-       case BSDTHREAD_CTL_QOS_MAX_PARALLELISM:
-               return _bsdthread_ctl_max_parallelism(p, cmd, (int)arg1, (unsigned long)arg2, retval);
-       default:
-               return EINVAL;
-       }
-}
-
-#pragma mark - Workqueue Implementation
-
-#pragma mark wq_flags
-
-static inline uint32_t
-_wq_flags(struct workqueue *wq)
-{
-       return atomic_load_explicit(&wq->wq_flags, memory_order_relaxed);
-}
-
-static inline bool
-_wq_exiting(struct workqueue *wq)
-{
-       return _wq_flags(wq) & WQ_EXITING;
-}
-
-static inline uint32_t
-_wq_flags_or_orig(struct workqueue *wq, uint32_t v)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS
-       uint32_t state;
-       do {
-               state = _wq_flags(wq);
-       } while (!OSCompareAndSwap(state, state | v, &wq->wq_flags));
-       return state;
-#else
-       return atomic_fetch_or_explicit(&wq->wq_flags, v, memory_order_relaxed);
-#endif
-}
-
-static inline uint32_t
-_wq_flags_and_orig(struct workqueue *wq, uint32_t v)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS
-       uint32_t state;
-       do {
-               state = _wq_flags(wq);
-       } while (!OSCompareAndSwap(state, state & v, &wq->wq_flags));
-       return state;
-#else
-       return atomic_fetch_and_explicit(&wq->wq_flags, v, memory_order_relaxed);
-#endif
-}
-
-static inline bool
-WQ_TIMER_DELAYED_NEEDED(struct workqueue *wq)
-{
-       uint32_t oldflags, newflags;
-       do {
-               oldflags = _wq_flags(wq);
-               if (oldflags & (WQ_EXITING | WQ_ATIMER_DELAYED_RUNNING)) {
-                       return false;
-               }
-               newflags = oldflags | WQ_ATIMER_DELAYED_RUNNING;
-       } while (!OSCompareAndSwap(oldflags, newflags, &wq->wq_flags));
-       return true;
-}
-
-static inline bool
-WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue *wq)
-{
-       uint32_t oldflags, newflags;
-       do {
-               oldflags = _wq_flags(wq);
-               if (oldflags & (WQ_EXITING | WQ_ATIMER_IMMEDIATE_RUNNING)) {
-                       return false;
-               }
-               newflags = oldflags | WQ_ATIMER_IMMEDIATE_RUNNING;
-       } while (!OSCompareAndSwap(oldflags, newflags, &wq->wq_flags));
-       return true;
-}
-
-#pragma mark thread requests pacing
-
-static inline uint32_t
-_wq_pacing_shift_for_pri(int pri)
-{
-       return _wq_bucket_to_thread_qos(pri) - 1;
-}
-
-static inline int
-_wq_highest_paced_priority(struct workqueue *wq)
-{
-       uint8_t paced = wq->wq_paced;
-       int msb = paced ? 32 - __builtin_clz(paced) : 0; // fls(paced) == bit + 1
-       return WORKQUEUE_EVENT_MANAGER_BUCKET - msb;
-}
-
-static inline uint8_t
-_wq_pacing_bit_for_pri(int pri)
-{
-       return 1u << _wq_pacing_shift_for_pri(pri);
-}
-
-static inline bool
-_wq_should_pace_priority(struct workqueue *wq, int pri)
-{
-       return wq->wq_paced >= _wq_pacing_bit_for_pri(pri);
-}
-
-static inline void
-_wq_pacing_start(struct workqueue *wq, struct threadlist *tl)
-{
-       uint8_t bit = _wq_pacing_bit_for_pri(tl->th_priority);
-       assert((tl->th_flags & TH_LIST_PACING) == 0);
-       assert((wq->wq_paced & bit) == 0);
-       wq->wq_paced |= bit;
-       tl->th_flags |= TH_LIST_PACING;
-}
-
-static inline bool
-_wq_pacing_end(struct workqueue *wq, struct threadlist *tl)
-{
-       if (tl->th_flags & TH_LIST_PACING) {
-               uint8_t bit = _wq_pacing_bit_for_pri(tl->th_priority);
-               assert((wq->wq_paced & bit) != 0);
-               wq->wq_paced ^= bit;
-               tl->th_flags &= ~TH_LIST_PACING;
-               return wq->wq_paced < bit; // !_wq_should_pace_priority
-       }
-       return false;
-}
-
-#pragma mark thread requests
-
-static void
-_threadreq_init_alloced(struct threadreq *req, int priority, int flags)
-{
-       assert((flags & TR_FLAG_ONSTACK) == 0);
-       req->tr_state = TR_STATE_NEW;
-       req->tr_priority = priority;
-       req->tr_flags = flags;
-}
-
-static void
-_threadreq_init_stack(struct threadreq *req, int priority, int flags)
-{
-       req->tr_state = TR_STATE_NEW;
-       req->tr_priority = priority;
-       req->tr_flags = flags | TR_FLAG_ONSTACK;
-}
-
-static void
-_threadreq_copy_prepare(struct workqueue *wq)
-{
-again:
-       if (wq->wq_cached_threadreq) {
-               return;
-       }
-
-       workqueue_unlock(wq);
-       struct threadreq *req = zalloc(pthread_zone_threadreq);
-       workqueue_lock_spin(wq);
-
-       if (wq->wq_cached_threadreq) {
-               /*
-                * We lost the race and someone left behind an extra threadreq for us
-                * to use.  Throw away our request and retry.
-                */
-               workqueue_unlock(wq);
-               zfree(pthread_zone_threadreq, req);
-               workqueue_lock_spin(wq);
-               goto again;
-       } else {
-               wq->wq_cached_threadreq = req;
-       }
-
-       assert(wq->wq_cached_threadreq);
-}
-
-static bool
-_threadreq_copy_prepare_noblock(struct workqueue *wq)
-{
-       if (wq->wq_cached_threadreq) {
-               return true;
-       }
-
-       wq->wq_cached_threadreq = zalloc_noblock(pthread_zone_threadreq);
-
-       return wq->wq_cached_threadreq != NULL;
-}
-
-static inline struct threadreq_head *
-_threadreq_list_for_req(struct workqueue *wq, const struct threadreq *req)
-{
-       if (req->tr_flags & TR_FLAG_OVERCOMMIT) {
-               return &wq->wq_overcommit_reqlist[req->tr_priority];
-       } else {
-               return &wq->wq_reqlist[req->tr_priority];
-       }
-}
-
-static void
-_threadreq_enqueue(struct workqueue *wq, struct threadreq *req)
-{
-       assert(req && req->tr_state == TR_STATE_NEW);
-       if (req->tr_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               assert(wq->wq_event_manager_threadreq.tr_state != TR_STATE_WAITING);
-               memcpy(&wq->wq_event_manager_threadreq, req, sizeof(struct threadreq));
-               req = &wq->wq_event_manager_threadreq;
-               req->tr_flags &= ~(TR_FLAG_ONSTACK | TR_FLAG_NO_PACING);
-       } else {
-               if (req->tr_flags & TR_FLAG_ONSTACK) {
-                       assert(wq->wq_cached_threadreq);
-                       struct threadreq *newreq = wq->wq_cached_threadreq;
-                       wq->wq_cached_threadreq = NULL;
-
-                       memcpy(newreq, req, sizeof(struct threadreq));
-                       newreq->tr_flags &= ~(TR_FLAG_ONSTACK | TR_FLAG_NO_PACING);
-                       req->tr_state = TR_STATE_DEAD;
-                       req = newreq;
-               }
-               TAILQ_INSERT_TAIL(_threadreq_list_for_req(wq, req), req, tr_entry);
-       }
-       req->tr_state = TR_STATE_WAITING;
-       wq->wq_reqcount++;
-}
-
-static void
-_threadreq_dequeue(struct workqueue *wq, struct threadreq *req)
-{
-       if (req->tr_priority != WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               struct threadreq_head *req_list = _threadreq_list_for_req(wq, req);
-#if DEBUG
-               struct threadreq *cursor = NULL;
-               TAILQ_FOREACH(cursor, req_list, tr_entry) {
-                       if (cursor == req) break;
-               }
-               assert(cursor == req);
-#endif
-               TAILQ_REMOVE(req_list, req, tr_entry);
-       }
-       wq->wq_reqcount--;
-}
-
-/*
- * Mark a thread request as complete.  At this point, it is treated as owned by
- * the submitting subsystem and you should assume it could be freed.
- *
- * Called with the workqueue lock held.
- */
-static int
-_threadreq_complete_and_unlock(proc_t p, struct workqueue *wq,
-               struct threadreq *req, struct threadlist *tl)
-{
-       struct threadreq *req_tofree = NULL;
-       bool sync = (req->tr_state == TR_STATE_NEW);
-       bool workloop = req->tr_flags & TR_FLAG_WORKLOOP;
-       bool onstack = req->tr_flags & TR_FLAG_ONSTACK;
-       bool kevent = req->tr_flags & TR_FLAG_KEVENT;
-       bool unbinding = tl->th_flags & TH_LIST_UNBINDING;
-       bool locked = true;
-       bool waking_parked_thread = (tl->th_flags & TH_LIST_BUSY);
-       int ret;
-
-       req->tr_state = TR_STATE_COMPLETE;
-
-       if (!workloop && !onstack && req != &wq->wq_event_manager_threadreq) {
-               if (wq->wq_cached_threadreq) {
-                       req_tofree = req;
-               } else {
-                       wq->wq_cached_threadreq = req;
-               }
-       }
-
-       if (tl->th_flags & TH_LIST_UNBINDING) {
-               tl->th_flags &= ~TH_LIST_UNBINDING;
-               assert((tl->th_flags & TH_LIST_KEVENT_BOUND));
-       } else if (workloop || kevent) {
-               assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-               tl->th_flags |= TH_LIST_KEVENT_BOUND;
-       }
-
-       if (workloop) {
-               workqueue_unlock(wq);
-               ret = pthread_kern->workloop_fulfill_threadreq(wq->wq_proc, (void*)req,
-                               tl->th_thread, sync ? WORKLOOP_FULFILL_THREADREQ_SYNC : 0);
-               assert(ret == 0);
-               locked = false;
-       } else if (kevent) {
-               unsigned int kevent_flags = KEVENT_FLAG_WORKQ;
-               if (sync) {
-                       kevent_flags |= KEVENT_FLAG_SYNCHRONOUS_BIND;
-               }
-               if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                       kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
-               }
-               workqueue_unlock(wq);
-               ret = kevent_qos_internal_bind(wq->wq_proc,
-                               class_index_get_thread_qos(tl->th_priority), tl->th_thread,
-                               kevent_flags);
-               if (ret != 0) {
-                       workqueue_lock_spin(wq);
-                       tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
-                       locked = true;
-               } else {
-                       locked = false;
-               }
-       }
-
-       /*
-        * Run Thread, Run!
-        */
-       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 0, 0, 0, 0);
-       PTHREAD_TRACE_WQ_REQ(TRACE_wq_runitem | DBG_FUNC_START, wq, req, tl->th_priority,
-                       thread_tid(current_thread()), thread_tid(tl->th_thread));
-
-       if (waking_parked_thread) {
-               if (!locked) {
-                       workqueue_lock_spin(wq);
-               }
-               tl->th_flags &= ~(TH_LIST_BUSY);
-               if ((tl->th_flags & TH_LIST_REMOVING_VOUCHER) == 0) {
-                       /*
-                        * If the thread is in the process of removing its voucher, then it
-                        * isn't actually in the wait event yet and we don't need to wake
-                        * it up.  Save the trouble (and potential lock-ordering issues
-                        * (see 30617015)).
-                        */
-                       thread_wakeup_thread(tl, tl->th_thread);
-               }
-               workqueue_unlock(wq);
-
-               if (req_tofree) zfree(pthread_zone_threadreq, req_tofree);
-               return WQ_RUN_TR_THREAD_STARTED;
-       }
-
-       assert ((tl->th_flags & TH_LIST_PACING) == 0);
-       if (locked) {
-               workqueue_unlock(wq);
-       }
-       if (req_tofree) zfree(pthread_zone_threadreq, req_tofree);
-       if (unbinding) {
-               return WQ_RUN_TR_THREAD_STARTED;
-       }
-       _setup_wqthread(p, tl->th_thread, wq, tl, WQ_SETUP_CLEAR_VOUCHER);
-       pthread_kern->unix_syscall_return(EJUSTRETURN);
-       __builtin_unreachable();
-}
-
-/*
- * Mark a thread request as cancelled.  Has similar ownership semantics to the
- * complete call above.
- */
-static void
-_threadreq_cancel(struct workqueue *wq, struct threadreq *req)
-{
-       assert(req->tr_state == TR_STATE_WAITING);
-       req->tr_state = TR_STATE_DEAD;
-
-       assert((req->tr_flags & TR_FLAG_ONSTACK) == 0);
-       if (req->tr_flags & TR_FLAG_WORKLOOP) {
-               __assert_only int ret;
-               ret = pthread_kern->workloop_fulfill_threadreq(wq->wq_proc, (void*)req,
-                               THREAD_NULL, WORKLOOP_FULFILL_THREADREQ_CANCEL);
-               assert(ret == 0 || ret == ECANCELED);
-       } else if (req != &wq->wq_event_manager_threadreq) {
-               zfree(pthread_zone_threadreq, req);
-       }
-}
-
-#pragma mark workqueue lock
-
-static boolean_t workqueue_lock_spin_is_acquired_kdp(struct workqueue *wq) {
-  return kdp_lck_spin_is_acquired(&wq->wq_lock);
-}
-
-static void
-workqueue_lock_spin(struct workqueue *wq)
-{
-       assert(ml_get_interrupts_enabled() == TRUE);
-       lck_spin_lock(&wq->wq_lock);
-}
-
-static bool
-workqueue_lock_try(struct workqueue *wq)
-{
-       return lck_spin_try_lock(&wq->wq_lock);
-}
-
-static void
-workqueue_unlock(struct workqueue *wq)
-{
-       lck_spin_unlock(&wq->wq_lock);
-}
-
-#pragma mark workqueue add timer
-
-/**
- * Sets up the timer which will call out to workqueue_add_timer
- */
-static void
-workqueue_interval_timer_start(struct workqueue *wq)
-{
-       uint64_t deadline;
-
-       /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the
-        ATIMER_RUNNING flag is not present.  The net effect here is that if a
-        sequence of threads is required, we'll double the time before we give out
-        the next one. */
-       if (wq->wq_timer_interval == 0) {
-               wq->wq_timer_interval = wq_stalled_window_usecs;
-
-       } else {
-               wq->wq_timer_interval = wq->wq_timer_interval * 2;
-
-               if (wq->wq_timer_interval > wq_max_timer_interval_usecs) {
-                       wq->wq_timer_interval = wq_max_timer_interval_usecs;
-               }
-       }
-       clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
-                       _wq_flags(wq), wq->wq_timer_interval, 0);
-
-       thread_call_t call = wq->wq_atimer_delayed_call;
-       if (thread_call_enter1_delayed(call, call, deadline)) {
-               panic("delayed_call was already enqueued");
-       }
-}
-
-/**
- * Immediately trigger the workqueue_add_timer
- */
-static void
-workqueue_interval_timer_trigger(struct workqueue *wq)
-{
-       PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
-                       _wq_flags(wq), 0, 0);
-
-       thread_call_t call = wq->wq_atimer_immediate_call;
-       if (thread_call_enter1(call, call)) {
-               panic("immediate_call was already enqueued");
-       }
-}
-
-/**
- * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts
- */
-static boolean_t
-wq_thread_is_busy(uint64_t cur_ts, _Atomic uint64_t *lastblocked_tsp)
-{
-       clock_sec_t     secs;
-       clock_usec_t    usecs;
-       uint64_t lastblocked_ts;
-       uint64_t elapsed;
-
-       lastblocked_ts = atomic_load_explicit(lastblocked_tsp, memory_order_relaxed);
-       if (lastblocked_ts >= cur_ts) {
-               /*
-                * because the update of the timestamp when a thread blocks isn't
-                * serialized against us looking at it (i.e. we don't hold the workq lock)
-                * it's possible to have a timestamp that matches the current time or
-                * that even looks to be in the future relative to when we grabbed the current
-                * time... just treat this as a busy thread since it must have just blocked.
-                */
-               return (TRUE);
-       }
-       elapsed = cur_ts - lastblocked_ts;
-
-       pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
-
-       return (secs == 0 && usecs < wq_stalled_window_usecs);
-}
-
-/**
- * handler function for the timer
- */
-static void
-workqueue_add_timer(struct workqueue *wq, thread_call_t thread_call_self)
-{
-       proc_t p = wq->wq_proc;
-
-       workqueue_lock_spin(wq);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq,
-                       _wq_flags(wq), wq->wq_nthreads, wq->wq_thidlecount, 0);
-
-       /*
-        * There's two tricky issues here.
-        *
-        * First issue: we start the thread_call's that invoke this routine without
-        * the workqueue lock held.  The scheduler callback needs to trigger
-        * reevaluation of the number of running threads but shouldn't take that
-        * lock, so we can't use it to synchronize state around the thread_call.
-        * As a result, it might re-enter the thread_call while this routine is
-        * already running.  This could cause it to fire a second time and we'll
-        * have two add_timers running at once.  Obviously, we don't want that to
-        * keep stacking, so we need to keep it at two timers.
-        *
-        * Solution: use wq_flags (accessed via atomic CAS) to synchronize the
-        * enqueue of the thread_call itself.  When a thread needs to trigger the
-        * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets
-        * the flag then does a thread_call_enter.  We'll then remove that flag
-        * only once we've got the lock and it's safe for the thread_call to be
-        * entered again.
-        *
-        * Second issue: we need to make sure that the two timers don't execute this
-        * routine concurrently.  We can't use the workqueue lock for this because
-        * we'll need to drop it during our execution.
-        *
-        * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that
-        * we are currently executing the routine and the next thread should wait.
-        *
-        * After all that, we arrive at the following four possible states:
-        * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY       no pending timer, no active timer
-        * !WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY       no pending timer,  1 active timer
-        *  WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY        1 pending timer, no active timer
-        *  WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY        1 pending timer,  1 active timer
-        *
-        * Further complication sometimes we need to trigger this function to run
-        * without delay.  Because we aren't under a lock between setting
-        * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply
-        * re-enter the thread call: if thread_call_enter() returned false, we
-        * wouldn't be able to distinguish the case where the thread_call had
-        * already fired from the case where it hadn't been entered yet from the
-        * other thread.  So, we use a separate thread_call for immediate
-        * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING.
-        */
-
-       while (wq->wq_lflags & WQL_ATIMER_BUSY) {
-               wq->wq_lflags |= WQL_ATIMER_WAITING;
-
-               assert_wait((caddr_t)wq, (THREAD_UNINT));
-               workqueue_unlock(wq);
-
-               thread_block(THREAD_CONTINUE_NULL);
-
-               workqueue_lock_spin(wq);
-       }
-       /*
-        * Prevent _workqueue_mark_exiting() from going away
-        */
-       wq->wq_lflags |= WQL_ATIMER_BUSY;
-
-       /*
-        * Decide which timer we are and remove the RUNNING flag.
-        */
-       if (thread_call_self == wq->wq_atimer_delayed_call) {
-               uint64_t wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_DELAYED_RUNNING);
-               if ((wq_flags & WQ_ATIMER_DELAYED_RUNNING) == 0) {
-                       panic("workqueue_add_timer(delayed) w/o WQ_ATIMER_DELAYED_RUNNING");
-               }
-       } else if (thread_call_self == wq->wq_atimer_immediate_call) {
-               uint64_t wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_IMMEDIATE_RUNNING);
-               if ((wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) == 0) {
-                       panic("workqueue_add_timer(immediate) w/o WQ_ATIMER_IMMEDIATE_RUNNING");
-               }
-       } else {
-               panic("workqueue_add_timer can't figure out which timer it is");
-       }
-
-       int ret = WQ_RUN_TR_THREAD_STARTED;
-       while (ret == WQ_RUN_TR_THREAD_STARTED && wq->wq_reqcount) {
-               ret = workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-
-               workqueue_lock_spin(wq);
-       }
-       _threadreq_copy_prepare(wq);
-
-       /*
-        * If we called WQ_TIMER_NEEDED above, then this flag will be set if that
-        * call marked the timer running.  If so, we let the timer interval grow.
-        * Otherwise, we reset it back to 0.
-        */
-       uint32_t wq_flags = _wq_flags(wq);
-       if (!(wq_flags & WQ_ATIMER_DELAYED_RUNNING)) {
-               wq->wq_timer_interval = 0;
-       }
-
-       wq->wq_lflags &= ~WQL_ATIMER_BUSY;
-
-       if ((wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
-               /*
-                * wakeup the thread hung up in _workqueue_mark_exiting or
-                * workqueue_add_timer waiting for this timer to finish getting out of
-                * the way
-                */
-               wq->wq_lflags &= ~WQL_ATIMER_WAITING;
-               wakeup(wq);
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, 0, wq->wq_nthreads, wq->wq_thidlecount, 0);
-
-       workqueue_unlock(wq);
-}
-
-#pragma mark thread state tracking
-
-// called by spinlock code when trying to yield to lock owner
-void
-_workqueue_thread_yielded(void)
-{
-}
-
-static void
-workqueue_callback(int type, thread_t thread)
-{
-       struct uthread *uth = pthread_kern->get_bsdthread_info(thread);
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-       struct workqueue *wq = tl->th_workq;
-       uint32_t old_count, req_qos, qos = tl->th_priority;
-       wq_thactive_t old_thactive;
-
-       switch (type) {
-       case SCHED_CALL_BLOCK: {
-               bool start_timer = false;
-
-               old_thactive = _wq_thactive_dec(wq, tl->th_priority);
-               req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
-               old_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-                               qos, NULL, NULL);
-
-               if (old_count == wq_max_concurrency[tl->th_priority]) {
-                       /*
-                        * The number of active threads at this priority has fallen below
-                        * the maximum number of concurrent threads that are allowed to run
-                        *
-                        * if we collide with another thread trying to update the
-                        * last_blocked (really unlikely since another thread would have to
-                        * get scheduled and then block after we start down this path), it's
-                        * not a problem.  Either timestamp is adequate, so no need to retry
-                        */
-                       atomic_store_explicit(&wq->wq_lastblocked_ts[qos],
-                                       mach_absolute_time(), memory_order_relaxed);
-               }
-
-               if (req_qos == WORKQUEUE_EVENT_MANAGER_BUCKET || qos > req_qos) {
-                       /*
-                        * The blocking thread is at a lower QoS than the highest currently
-                        * pending constrained request, nothing has to be redriven
-                        */
-               } else {
-                       uint32_t max_busycount, old_req_count;
-                       old_req_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-                                       req_qos, NULL, &max_busycount);
-                       /*
-                        * If it is possible that may_start_constrained_thread had refused
-                        * admission due to being over the max concurrency, we may need to
-                        * spin up a new thread.
-                        *
-                        * We take into account the maximum number of busy threads
-                        * that can affect may_start_constrained_thread as looking at the
-                        * actual number may_start_constrained_thread will see is racy.
-                        *
-                        * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is
-                        * between NCPU (4) and NCPU - 2 (2) we need to redrive.
-                        */
-                       if (wq_max_concurrency[req_qos] <= old_req_count + max_busycount &&
-                                       old_req_count <= wq_max_concurrency[req_qos]) {
-                               if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-                                       start_timer = true;
-                                       workqueue_interval_timer_start(wq);
-                               }
-                       }
-               }
-
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq,
-                               old_count - 1, qos | (req_qos << 8),
-                               wq->wq_reqcount << 1 | start_timer, 0);
-               break;
-       }
-       case SCHED_CALL_UNBLOCK: {
-               /*
-                * we cannot take the workqueue_lock here...
-                * an UNBLOCK can occur from a timer event which
-                * is run from an interrupt context... if the workqueue_lock
-                * is already held by this processor, we'll deadlock...
-                * the thread lock for the thread being UNBLOCKED
-                * is also held
-                */
-               old_thactive = _wq_thactive_inc(wq, qos);
-               if (pthread_debug_tracing) {
-                       req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
-                       old_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-                                       qos, NULL, NULL);
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq,
-                                       old_count + 1, qos | (req_qos << 8),
-                                       wq->wq_threads_scheduled, 0);
-               }
-               break;
-       }
-       }
-}
-
-sched_call_t
-_workqueue_get_sched_callback(void)
-{
-       return workqueue_callback;
-}
-
-#pragma mark thread addition/removal
-
-static mach_vm_size_t
-_workqueue_allocsize(struct workqueue *wq)
-{
-       proc_t p = wq->wq_proc;
-       mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
-       mach_vm_size_t pthread_size =
-               vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
-       return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
-}
-
-/**
- * pop goes the thread
- *
- * If fromexit is set, the call is from workqueue_exit(,
- * so some cleanups are to be avoided.
- */
-static void
-workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use)
-{
-       struct uthread * uth;
-       struct workqueue * wq = tl->th_workq;
-
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
-       } else {
-               TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-       }
-
-       if (fromexit == 0) {
-               assert(wq->wq_nthreads && wq->wq_thidlecount);
-               wq->wq_nthreads--;
-               wq->wq_thidlecount--;
-       }
-
-       /*
-        * Clear the threadlist pointer in uthread so
-        * blocked thread on wakeup for termination will
-        * not access the thread list as it is going to be
-        * freed.
-        */
-       pthread_kern->thread_sched_call(tl->th_thread, NULL);
-
-       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-       if (uth != (struct uthread *)0) {
-               pthread_kern->uthread_set_threadlist(uth, NULL);
-       }
-       if (fromexit == 0) {
-               /* during exit the lock is not held */
-               workqueue_unlock(wq);
-       }
-
-       if ( (tl->th_flags & TH_LIST_NEW) || first_use ) {
-               /*
-                * thread was created, but never used...
-                * need to clean up the stack and port ourselves
-                * since we're not going to spin up through the
-                * normal exit path triggered from Libc
-                */
-               if (fromexit == 0) {
-                       /* vm map is already deallocated when this is called from exit */
-                       (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, _workqueue_allocsize(wq));
-               }
-               (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport);
-       }
-       /*
-        * drop our ref on the thread
-        */
-       thread_deallocate(tl->th_thread);
-
-       zfree(pthread_zone_threadlist, tl);
-}
-
-
-/**
- * Try to add a new workqueue thread.
- *
- * - called with workq lock held
- * - dropped and retaken around thread creation
- * - return with workq lock held
- */
-static bool
-workqueue_addnewthread(proc_t p, struct workqueue *wq)
-{
-       kern_return_t kret;
-
-       wq->wq_nthreads++;
-
-       workqueue_unlock(wq);
-
-       struct threadlist *tl = zalloc(pthread_zone_threadlist);
-       bzero(tl, sizeof(struct threadlist));
-
-       thread_t th;
-       kret = pthread_kern->thread_create_workq_waiting(wq->wq_task, wq_unpark_continue, tl, &th);
-       if (kret != KERN_SUCCESS) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 0, 0, 0);
-               goto fail_free;
-       }
-
-       mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
-
-       mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
-       mach_vm_size_t pthread_size =
-               vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
-       mach_vm_size_t th_allocsize = guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
-
-       kret = mach_vm_map(wq->wq_map, &stackaddr,
-                       th_allocsize, page_size-1,
-                       VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, NULL,
-                       0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-                       VM_INHERIT_DEFAULT);
-
-       if (kret != KERN_SUCCESS) {
-               kret = mach_vm_allocate(wq->wq_map,
-                               &stackaddr, th_allocsize,
-                               VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
-       }
-
-       if (kret != KERN_SUCCESS) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 1, 0, 0);
-               goto fail_terminate;
-       }
-
-       /*
-        * The guard page is at the lowest address
-        * The stack base is the highest address
-        */
-       kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
-       if (kret != KERN_SUCCESS) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 2, 0, 0);
-               goto fail_vm_deallocate;
-       }
-
-
-       pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE);
-       pthread_kern->thread_static_param(th, TRUE);
-
-       /*
-        * convert_thread_to_port() consumes a reference
-        */
-       thread_reference(th);
-       void *sright = (void *)pthread_kern->convert_thread_to_port(th);
-       tl->th_thport = pthread_kern->ipc_port_copyout_send(sright,
-                       pthread_kern->task_get_ipcspace(wq->wq_task));
-
-       tl->th_flags = TH_LIST_INITED | TH_LIST_NEW;
-       tl->th_thread = th;
-       tl->th_workq = wq;
-       tl->th_stackaddr = stackaddr;
-       tl->th_priority = WORKQUEUE_NUM_BUCKETS;
-
-       struct uthread *uth;
-       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-
-       workqueue_lock_spin(wq);
-
-       void *current_tl = pthread_kern->uthread_get_threadlist(uth);
-       if (current_tl == NULL) {
-               pthread_kern->uthread_set_threadlist(uth, tl);
-               TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
-               wq->wq_thidlecount++;
-       } else if (current_tl == WQ_THREADLIST_EXITING_POISON) {
-               /*
-                * Failed thread creation race: The thread already woke up and has exited.
-                */
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 3, 0, 0);
-               goto fail_unlock;
-       } else {
-               panic("Unexpected initial threadlist value");
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
-
-       return (TRUE);
-
-fail_unlock:
-       workqueue_unlock(wq);
-       (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task),
-                       tl->th_thport);
-
-fail_vm_deallocate:
-       (void) mach_vm_deallocate(wq->wq_map, stackaddr, th_allocsize);
-
-fail_terminate:
-       if (pthread_kern->thread_will_park_or_terminate) {
-               pthread_kern->thread_will_park_or_terminate(th);
-       }
-       (void)thread_terminate(th);
-       thread_deallocate(th);
-
-fail_free:
-       zfree(pthread_zone_threadlist, tl);
-
-       workqueue_lock_spin(wq);
-       wq->wq_nthreads--;
-
-       return (FALSE);
-}
-
-/**
- * Setup per-process state for the workqueue.
- */
-int
-_workq_open(struct proc *p, __unused int32_t *retval)
-{
-       struct workqueue * wq;
-       char * ptr;
-       uint32_t num_cpus;
-       int error = 0;
-
-       if (pthread_kern->proc_get_register(p) == 0) {
-               return EINVAL;
-       }
-
-       num_cpus = pthread_kern->ml_get_max_cpus();
-
-       if (wq_init_constrained_limit) {
-               uint32_t limit;
-               /*
-                * set up the limit for the constrained pool
-                * this is a virtual pool in that we don't
-                * maintain it on a separate idle and run list
-                */
-               limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
-
-               if (limit > wq_max_constrained_threads)
-                       wq_max_constrained_threads = limit;
-
-               wq_init_constrained_limit = 0;
-
-               if (wq_max_threads > WQ_THACTIVE_BUCKET_HALF) {
-                       wq_max_threads = WQ_THACTIVE_BUCKET_HALF;
-               }
-               if (wq_max_threads > pthread_kern->config_thread_max - 20) {
-                       wq_max_threads = pthread_kern->config_thread_max - 20;
-               }
-       }
-
-       if (pthread_kern->proc_get_wqptr(p) == NULL) {
-               if (pthread_kern->proc_init_wqptr_or_wait(p) == FALSE) {
-                       assert(pthread_kern->proc_get_wqptr(p) != NULL);
-                       goto out;
-               }
-
-               ptr = (char *)zalloc(pthread_zone_workqueue);
-               bzero(ptr, sizeof(struct workqueue));
-
-               wq = (struct workqueue *)ptr;
-               wq->wq_proc = p;
-               wq->wq_task = current_task();
-               wq->wq_map  = pthread_kern->current_map();
-
-               // Start the event manager at the priority hinted at by the policy engine
-               int mgr_priority_hint = pthread_kern->task_get_default_manager_qos(current_task());
-               wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-
-               TAILQ_INIT(&wq->wq_thrunlist);
-               TAILQ_INIT(&wq->wq_thidlelist);
-               for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-                       TAILQ_INIT(&wq->wq_overcommit_reqlist[i]);
-                       TAILQ_INIT(&wq->wq_reqlist[i]);
-               }
-
-               wq->wq_atimer_delayed_call =
-                               thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
-                                               (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
-               wq->wq_atimer_immediate_call =
-                               thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
-                                               (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
-
-               lck_spin_init(&wq->wq_lock, pthread_lck_grp, pthread_lck_attr);
-
-               wq->wq_cached_threadreq = zalloc(pthread_zone_threadreq);
-               *(wq_thactive_t *)&wq->wq_thactive =
-                               (wq_thactive_t)WQ_THACTIVE_NO_PENDING_REQUEST <<
-                               WQ_THACTIVE_QOS_SHIFT;
-
-               pthread_kern->proc_set_wqptr(p, wq);
-
-       }
-out:
-
-       return(error);
-}
-
-/*
- * Routine:    workqueue_mark_exiting
- *
- * Function:   Mark the work queue such that new threads will not be added to the
- *             work queue after we return.
- *
- * Conditions: Called against the current process.
- */
-void
-_workqueue_mark_exiting(struct proc *p)
-{
-       struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
-       if (!wq) return;
-
-       PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-       workqueue_lock_spin(wq);
-
-       /*
-        * We arm the add timer without holding the workqueue lock so we need
-        * to synchronize with any running or soon to be running timers.
-        *
-        * Threads that intend to arm the timer atomically OR
-        * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if
-        * WQ_EXITING is not present.  So, once we have set WQ_EXITING, we can
-        * be sure that no new RUNNING flags will be set, but still need to
-        * wait for the already running timers to complete.
-        *
-        * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so
-        * the check for and sleep until clear is protected.
-        */
-       uint64_t wq_flags = _wq_flags_or_orig(wq, WQ_EXITING);
-
-       if (wq_flags & WQ_ATIMER_DELAYED_RUNNING) {
-               if (thread_call_cancel(wq->wq_atimer_delayed_call) == TRUE) {
-                       wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_DELAYED_RUNNING);
-               }
-       }
-       if (wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) {
-               if (thread_call_cancel(wq->wq_atimer_immediate_call) == TRUE) {
-                       wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_IMMEDIATE_RUNNING);
-               }
-       }
-       while ((_wq_flags(wq) & (WQ_ATIMER_DELAYED_RUNNING | WQ_ATIMER_IMMEDIATE_RUNNING)) ||
-                       (wq->wq_lflags & WQL_ATIMER_BUSY)) {
-               assert_wait((caddr_t)wq, (THREAD_UNINT));
-               workqueue_unlock(wq);
-
-               thread_block(THREAD_CONTINUE_NULL);
-
-               workqueue_lock_spin(wq);
-       }
-
-       /*
-        * Save off pending requests, will complete/free them below after unlocking
-        */
-       TAILQ_HEAD(, threadreq) local_list = TAILQ_HEAD_INITIALIZER(local_list);
-
-       for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-               TAILQ_CONCAT(&local_list, &wq->wq_overcommit_reqlist[i], tr_entry);
-               TAILQ_CONCAT(&local_list, &wq->wq_reqlist[i], tr_entry);
-       }
-
-       /*
-        * XXX: Can't deferred cancel the event manager request, so just smash it.
-        */
-       assert((wq->wq_event_manager_threadreq.tr_flags & TR_FLAG_WORKLOOP) == 0);
-       wq->wq_event_manager_threadreq.tr_state = TR_STATE_DEAD;
-
-       workqueue_unlock(wq);
-
-       struct threadreq *tr, *tr_temp;
-       TAILQ_FOREACH_SAFE(tr, &local_list, tr_entry, tr_temp) {
-               _threadreq_cancel(wq, tr);
-       }
-       PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
-}
-
-/*
- * Routine:    workqueue_exit
- *
- * Function:   clean up the work queue structure(s) now that there are no threads
- *             left running inside the work queue (except possibly current_thread).
- *
- * Conditions: Called by the last thread in the process.
- *             Called against current process.
- */
-void
-_workqueue_exit(struct proc *p)
-{
-       struct workqueue  * wq;
-       struct threadlist  * tl, *tlist;
-       struct uthread  *uth;
-
-       wq = pthread_kern->proc_get_wqptr(p);
-       if (wq != NULL) {
-
-               PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-               pthread_kern->proc_set_wqptr(p, NULL);
-
-               /*
-                * Clean up workqueue data structures for threads that exited and
-                * didn't get a chance to clean up after themselves.
-                */
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) != 0);
-
-                       pthread_kern->thread_sched_call(tl->th_thread, NULL);
-
-                       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-                       if (uth != (struct uthread *)0) {
-                               pthread_kern->uthread_set_threadlist(uth, NULL);
-                       }
-                       TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-
-                       /*
-                        * drop our last ref on the thread
-                        */
-                       thread_deallocate(tl->th_thread);
-
-                       zfree(pthread_zone_threadlist, tl);
-               }
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-                       assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
-                       workqueue_removethread(tl, true, false);
-               }
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlemgrlist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-                       assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-                       workqueue_removethread(tl, true, false);
-               }
-               if (wq->wq_cached_threadreq) {
-                       zfree(pthread_zone_threadreq, wq->wq_cached_threadreq);
-               }
-               thread_call_free(wq->wq_atimer_delayed_call);
-               thread_call_free(wq->wq_atimer_immediate_call);
-               lck_spin_destroy(&wq->wq_lock, pthread_lck_grp);
-
-               for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-                       assert(TAILQ_EMPTY(&wq->wq_overcommit_reqlist[i]));
-                       assert(TAILQ_EMPTY(&wq->wq_reqlist[i]));
-               }
-
-               zfree(pthread_zone_workqueue, wq);
-
-               PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
-       }
-}
-
-
-#pragma mark workqueue thread manipulation
-
-
-/**
- * Entry point for libdispatch to ask for threads
- */
-static int
-wqops_queue_reqthreads(struct proc *p, int reqcount,
-               pthread_priority_t priority)
-{
-       bool overcommit = _pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-       bool event_manager = _pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-       int class = event_manager ? WORKQUEUE_EVENT_MANAGER_BUCKET :
-                       pthread_priority_get_class_index(priority);
-
-       if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS) ||
-                       (overcommit && event_manager)) {
-               return EINVAL;
-       }
-
-       struct workqueue *wq;
-       if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       workqueue_lock_spin(wq);
-       _threadreq_copy_prepare(wq);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_wqops_reqthreads | DBG_FUNC_NONE, wq, reqcount, priority, 0, 0);
-
-       int tr_flags = 0;
-       if (overcommit) tr_flags |= TR_FLAG_OVERCOMMIT;
-       if (reqcount > 1) {
-               /*
-                * when libdispatch asks for more than one thread, it wants to achieve
-                * parallelism. Pacing would be detrimental to this ask, so treat
-                * these specially to not do the pacing admission check
-                */
-               tr_flags |= TR_FLAG_NO_PACING;
-       }
-
-       while (reqcount-- && !_wq_exiting(wq)) {
-               struct threadreq req;
-               _threadreq_init_stack(&req, class, tr_flags);
-
-               workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, true);
-
-               workqueue_lock_spin(wq); /* reacquire */
-               _threadreq_copy_prepare(wq);
-       }
-
-       workqueue_unlock(wq);
-
-       return 0;
-}
-
-/*
- * Used by the kevent system to request threads.
- *
- * Currently count is ignored and we always return one thread per invocation.
- */
-static thread_t
-_workq_kevent_reqthreads(struct proc *p, pthread_priority_t priority,
-               bool no_emergency)
-{
-       int wq_run_tr = WQ_RUN_TR_THROTTLED;
-       bool emergency_thread = false;
-       struct threadreq req;
-
-
-       struct workqueue *wq;
-       if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return THREAD_NULL;
-       }
-
-       int class = pthread_priority_get_class_index(priority);
-
-       workqueue_lock_spin(wq);
-       bool has_threadreq = _threadreq_copy_prepare_noblock(wq);
-
-       PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, NULL, priority, 0, 0);
-
-       /*
-        * Skip straight to event manager if that's what was requested
-        */
-       if ((_pthread_priority_get_qos_newest(priority) == QOS_CLASS_UNSPECIFIED) ||
-                       (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)){
-               goto event_manager;
-       }
-
-       bool will_pace = _wq_should_pace_priority(wq, class);
-       if ((wq->wq_thidlecount == 0 || will_pace) && has_threadreq == false) {
-               /*
-                * We'll need to persist the request and can't, so return the emergency
-                * thread instead, which has a persistent request object.
-                */
-               emergency_thread = true;
-               goto event_manager;
-       }
-
-       /*
-        * Handle overcommit requests
-        */
-       if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
-               _threadreq_init_stack(&req, class, TR_FLAG_KEVENT | TR_FLAG_OVERCOMMIT);
-               wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-               goto done;
-       }
-
-       /*
-        * Handle constrained requests
-        */
-       boolean_t may_start = may_start_constrained_thread(wq, class, NULL, false);
-       if (may_start || no_emergency) {
-               _threadreq_init_stack(&req, class, TR_FLAG_KEVENT);
-               wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-               goto done;
-       } else {
-               emergency_thread = true;
-       }
-
-
-event_manager:
-       _threadreq_init_stack(&req, WORKQUEUE_EVENT_MANAGER_BUCKET, TR_FLAG_KEVENT);
-       wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-
-done:
-       if (wq_run_tr == WQ_RUN_TR_THREAD_NEEDED && WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-               workqueue_interval_timer_trigger(wq);
-       }
-       return emergency_thread ? (void*)-1 : 0;
-}
-
-thread_t
-_workq_reqthreads(struct proc *p, __assert_only int requests_count,
-               workq_reqthreads_req_t request)
-{
-       assert(requests_count == 1);
-
-       pthread_priority_t priority = request->priority;
-       bool no_emergency = request->count & WORKQ_REQTHREADS_NOEMERGENCY;
-
-       return _workq_kevent_reqthreads(p, priority, no_emergency);
-}
-
-
-int
-workq_kern_threadreq(struct proc *p, workq_threadreq_t _req,
-               enum workq_threadreq_type type, unsigned long priority, int flags)
-{
-       struct workqueue *wq;
-       int ret;
-
-       if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       switch (type) {
-       case WORKQ_THREADREQ_KEVENT: {
-               bool no_emergency = flags & WORKQ_THREADREQ_FLAG_NOEMERGENCY;
-               (void)_workq_kevent_reqthreads(p, priority, no_emergency);
-               return 0;
-       }
-       case WORKQ_THREADREQ_WORKLOOP:
-       case WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL: {
-               struct threadreq *req = (struct threadreq *)_req;
-               int req_class = pthread_priority_get_class_index(priority);
-               int req_flags = TR_FLAG_WORKLOOP;
-               if ((_pthread_priority_get_flags(priority) &
-                               _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
-                       req_flags |= TR_FLAG_OVERCOMMIT;
-               }
-
-               thread_t thread = current_thread();
-               struct threadlist *tl = util_get_thread_threadlist_entry(thread);
-
-               if (tl && tl != WQ_THREADLIST_EXITING_POISON &&
-                               (tl->th_flags & TH_LIST_UNBINDING)) {
-                       /*
-                        * we're called back synchronously from the context of
-                        * kevent_qos_internal_unbind from within wqops_thread_return()
-                        * we can try to match up this thread with this request !
-                        */
-               } else {
-                       tl = NULL;
-               }
-
-               _threadreq_init_alloced(req, req_class, req_flags);
-               workqueue_lock_spin(wq);
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, priority, 1, 0);
-               ret = workqueue_run_threadreq_and_unlock(p, wq, tl, req, false);
-               if (ret == WQ_RUN_TR_EXITING) {
-                       return ECANCELED;
-               }
-               if (ret == WQ_RUN_TR_THREAD_NEEDED) {
-                       if (type == WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL) {
-                               return EAGAIN;
-                       }
-                       if (WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-                               workqueue_interval_timer_trigger(wq);
-                       }
-               }
-               return 0;
-       }
-       case WORKQ_THREADREQ_REDRIVE:
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, 0, 0, 4, 0);
-               workqueue_lock_spin(wq);
-               ret = workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-               if (ret == WQ_RUN_TR_EXITING) {
-                       return ECANCELED;
-               }
-               return 0;
-       default:
-               return ENOTSUP;
-       }
-}
-
-int
-workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t _req,
-               enum workq_threadreq_op operation, unsigned long arg1,
-               unsigned long __unused arg2)
-{
-       struct threadreq *req = (struct threadreq *)_req;
-       struct workqueue *wq;
-       int priclass, ret = 0, wq_tr_rc = WQ_RUN_TR_THROTTLED;
-
-       if (req == NULL || (wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       workqueue_lock_spin(wq);
-
-       if (_wq_exiting(wq)) {
-               ret = ECANCELED;
-               goto out_unlock;
-       }
-
-       /*
-        * Find/validate the referenced request structure
-        */
-       if (req->tr_state != TR_STATE_WAITING) {
-               ret = EINVAL;
-               goto out_unlock;
-       }
-       assert(req->tr_priority < WORKQUEUE_EVENT_MANAGER_BUCKET);
-       assert(req->tr_flags & TR_FLAG_WORKLOOP);
-
-       switch (operation) {
-       case WORKQ_THREADREQ_CHANGE_PRI:
-       case WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL:
-               priclass = pthread_priority_get_class_index(arg1);
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, arg1, 2, 0);
-               if (req->tr_priority == priclass) {
-                       goto out_unlock;
-               }
-               _threadreq_dequeue(wq, req);
-               req->tr_priority = priclass;
-               req->tr_state = TR_STATE_NEW; // what was old is new again
-               wq_tr_rc = workqueue_run_threadreq_and_unlock(p, wq, NULL, req, false);
-               goto out;
-
-       case WORKQ_THREADREQ_CANCEL:
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, 0, 3, 0);
-               _threadreq_dequeue(wq, req);
-               req->tr_state = TR_STATE_DEAD;
-               break;
-
-       default:
-               ret = ENOTSUP;
-               break;
-       }
-
-out_unlock:
-       workqueue_unlock(wq);
-out:
-       if (wq_tr_rc == WQ_RUN_TR_THREAD_NEEDED) {
-               if (operation == WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL) {
-                       ret = EAGAIN;
-               } else if (WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-                       workqueue_interval_timer_trigger(wq);
-               }
-       }
-       return ret;
-}
-
-
-static int
-wqops_thread_return(struct proc *p, struct workqueue *wq)
-{
-       thread_t th = current_thread();
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-
-       /* reset signal mask on the workqueue thread to default state */
-       if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
-               pthread_kern->proc_lock(p);
-               pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
-               pthread_kern->proc_unlock(p);
-       }
-
-       if (wq == NULL || !tl) {
-               return EINVAL;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_START, tl->th_workq, 0, 0, 0, 0);
-
-       /*
-        * This squash call has neat semantics: it removes the specified overrides,
-        * replacing the current requested QoS with the previous effective QoS from
-        * those overrides.  This means we won't be preempted due to having our QoS
-        * lowered.  Of course, now our understanding of the thread's QoS is wrong,
-        * so we'll adjust below.
-        */
-       bool was_manager = (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-       int new_qos;
-
-       if (!was_manager) {
-               new_qos = pthread_kern->proc_usynch_thread_qos_squash_override_for_resource(th,
-                               THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD,
-                               THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_runitem | DBG_FUNC_END, wq, tl->th_priority, 0, 0, 0);
-
-       workqueue_lock_spin(wq);
-
-       if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
-               unsigned int flags = KEVENT_FLAG_WORKQ;
-               if (was_manager) {
-                       flags |= KEVENT_FLAG_WORKQ_MANAGER;
-               }
-
-               tl->th_flags |= TH_LIST_UNBINDING;
-               workqueue_unlock(wq);
-               kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, flags);
-               if (!(tl->th_flags & TH_LIST_UNBINDING)) {
-                       _setup_wqthread(p, th, wq, tl, WQ_SETUP_CLEAR_VOUCHER);
-                       pthread_kern->unix_syscall_return(EJUSTRETURN);
-                       __builtin_unreachable();
-               }
-               workqueue_lock_spin(wq);
-               tl->th_flags &= ~(TH_LIST_KEVENT_BOUND | TH_LIST_UNBINDING);
-       }
-
-       if (!was_manager) {
-               /* Fix up counters from the squash operation. */
-               uint8_t old_bucket = tl->th_priority;
-               uint8_t new_bucket = thread_qos_get_class_index(new_qos);
-
-               if (old_bucket != new_bucket) {
-                       _wq_thactive_move(wq, old_bucket, new_bucket);
-                       wq->wq_thscheduled_count[old_bucket]--;
-                       wq->wq_thscheduled_count[new_bucket]++;
-
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_squash | DBG_FUNC_NONE, wq, tl->th_priority, new_bucket, 0, 0);
-                       tl->th_priority = new_bucket;
-                       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_END, tl->th_workq, new_qos, 0, 0, 0);
-               }
-       }
-
-       workqueue_run_threadreq_and_unlock(p, wq, tl, NULL, false);
-       return 0;
-}
-
-/**
- * Multiplexed call to interact with the workqueue mechanism
- */
-int
-_workq_kernreturn(struct proc *p,
-                 int options,
-                 user_addr_t item,
-                 int arg2,
-                 int arg3,
-                 int32_t *retval)
-{
-       struct workqueue *wq;
-       int error = 0;
-
-       if (pthread_kern->proc_get_register(p) == 0) {
-               return EINVAL;
-       }
-
-       switch (options) {
-       case WQOPS_QUEUE_NEWSPISUPP: {
-               /*
-                * arg2 = offset of serialno into dispatch queue
-                * arg3 = kevent support
-                */
-               int offset = arg2;
-               if (arg3 & 0x01){
-                       // If we get here, then userspace has indicated support for kevent delivery.
-               }
-
-               pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
-               break;
-       }
-       case WQOPS_QUEUE_REQTHREADS: {
-               /*
-                * arg2 = number of threads to start
-                * arg3 = priority
-                */
-               error = wqops_queue_reqthreads(p, arg2, arg3);
-               break;
-       }
-       case WQOPS_SET_EVENT_MANAGER_PRIORITY: {
-               /*
-                * arg2 = priority for the manager thread
-                *
-                * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the
-                * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead
-                * of a QOS value
-                */
-               pthread_priority_t pri = arg2;
-
-               wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-               if (wq == NULL) {
-                       error = EINVAL;
-                       break;
-               }
-               workqueue_lock_spin(wq);
-               if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
-                       /*
-                        * If userspace passes a scheduling priority, that takes precidence
-                        * over any QoS.  (So, userspace should take care not to accidenatally
-                        * lower the priority this way.)
-                        */
-                       uint32_t sched_pri = pri & _PTHREAD_PRIORITY_SCHED_PRI_MASK;
-                       if (wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
-                               wq->wq_event_manager_priority = MAX(sched_pri, wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_MASK)
-                                               | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-                       } else {
-                               wq->wq_event_manager_priority = sched_pri
-                                               | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-                       }
-               } else if ((wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
-                       int cur_qos = pthread_priority_get_thread_qos(wq->wq_event_manager_priority);
-                       int new_qos = pthread_priority_get_thread_qos(pri);
-                       wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos, new_qos)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-               }
-               workqueue_unlock(wq);
-               break;
-       }
-       case WQOPS_THREAD_KEVENT_RETURN:
-       case WQOPS_THREAD_WORKLOOP_RETURN:
-               wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-               PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, options, 0, 0, 0);
-               if (item != 0 && arg2 != 0) {
-                       int32_t kevent_retval;
-                       int ret;
-                       if (options == WQOPS_THREAD_KEVENT_RETURN) {
-                               ret = kevent_qos_internal(p, -1, item, arg2, item, arg2, NULL, NULL,
-                                               KEVENT_FLAG_WORKQ | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS,
-                                               &kevent_retval);
-                       } else /* options == WQOPS_THREAD_WORKLOOP_RETURN */ {
-                               kqueue_id_t kevent_id = -1;
-                               ret = kevent_id_internal(p, &kevent_id, item, arg2, item, arg2,
-                                               NULL, NULL,
-                                               KEVENT_FLAG_WORKLOOP | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS,
-                                               &kevent_retval);
-                       }
-                       /*
-                        * We shouldn't be getting more errors out than events we put in, so
-                        * reusing the input buffer should always provide enough space.  But,
-                        * the assert is commented out since we get errors in edge cases in the
-                        * process lifecycle.
-                        */
-                       //assert(ret == KERN_SUCCESS && kevent_retval >= 0);
-                       if (ret != KERN_SUCCESS){
-                               error = ret;
-                               break;
-                       } else if (kevent_retval > 0){
-                               assert(kevent_retval <= arg2);
-                               *retval = kevent_retval;
-                               error = 0;
-                               break;
-                       }
-               }
-               goto thread_return;
-
-       case WQOPS_THREAD_RETURN:
-               wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-               PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, options, 0, 0, 0);
-       thread_return:
-               error = wqops_thread_return(p, wq);
-               // NOT REACHED except in case of error
-               assert(error);
-               break;
-
-       case WQOPS_SHOULD_NARROW: {
-               /*
-                * arg2 = priority to test
-                * arg3 = unused
-                */
-               pthread_priority_t priority = arg2;
-               thread_t th = current_thread();
-               struct threadlist *tl = util_get_thread_threadlist_entry(th);
-
-               if (tl == NULL || (tl->th_flags & TH_LIST_CONSTRAINED) == 0) {
-                       error = EINVAL;
-                       break;
-               }
-
-               int class = pthread_priority_get_class_index(priority);
-               wq = tl->th_workq;
-               workqueue_lock_spin(wq);
-               bool should_narrow = !may_start_constrained_thread(wq, class, tl, false);
-               workqueue_unlock(wq);
-
-               *retval = should_narrow;
-               break;
-       }
-       default:
-               error = EINVAL;
-               break;
-       }
-
-       switch (options) {
-       case WQOPS_THREAD_KEVENT_RETURN:
-       case WQOPS_THREAD_WORKLOOP_RETURN:
-       case WQOPS_THREAD_RETURN:
-               PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, wq, options, 0, 0, 0);
-               break;
-       }
-       return (error);
-}
-
-/*
- * We have no work to do, park ourselves on the idle list.
- *
- * Consumes the workqueue lock and does not return.
- */
-static void __dead2
-parkit(struct workqueue *wq, struct threadlist *tl, thread_t thread)
-{
-       assert(thread == tl->th_thread);
-       assert(thread == current_thread());
-
-       PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-       uint32_t us_to_wait = 0;
-
-       TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-
-       tl->th_flags &= ~TH_LIST_RUNNING;
-       tl->th_flags &= ~TH_LIST_KEVENT;
-       assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-
-       if (tl->th_flags & TH_LIST_CONSTRAINED) {
-               wq->wq_constrained_threads_scheduled--;
-               tl->th_flags &= ~TH_LIST_CONSTRAINED;
-       }
-
-       _wq_thactive_dec(wq, tl->th_priority);
-       wq->wq_thscheduled_count[tl->th_priority]--;
-       wq->wq_threads_scheduled--;
-       uint32_t thidlecount = ++wq->wq_thidlecount;
-
-       pthread_kern->thread_sched_call(thread, NULL);
-
-       /*
-        * We'd like to always have one manager thread parked so that we can have
-        * low latency when we need to bring a manager thread up.  If that idle
-        * thread list is empty, make this thread a manager thread.
-        *
-        * XXX: This doesn't check that there's not a manager thread outstanding,
-        * so it's based on the assumption that most manager callouts will change
-        * their QoS before parking.  If that stops being true, this may end up
-        * costing us more than we gain.
-        */
-       if (TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
-                       tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET){
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-                                       wq, thread_tid(thread),
-                                       (tl->th_priority << 16) | WORKQUEUE_EVENT_MANAGER_BUCKET, 2, 0);
-               reset_priority(tl, pthread_priority_from_wq_class_index(wq, WORKQUEUE_EVENT_MANAGER_BUCKET));
-               tl->th_priority = WORKQUEUE_EVENT_MANAGER_BUCKET;
-       }
-
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               TAILQ_INSERT_HEAD(&wq->wq_thidlemgrlist, tl, th_entry);
-       } else {
-               TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
-       }
-
-       /*
-        * When we remove the voucher from the thread, we may lose our importance
-        * causing us to get preempted, so we do this after putting the thread on
-        * the idle list.  That when, when we get our importance back we'll be able
-        * to use this thread from e.g. the kevent call out to deliver a boosting
-        * message.
-        */
-       tl->th_flags |= TH_LIST_REMOVING_VOUCHER;
-       workqueue_unlock(wq);
-       if (pthread_kern->thread_will_park_or_terminate) {
-               pthread_kern->thread_will_park_or_terminate(tl->th_thread);
-       }
-       __assert_only kern_return_t kr;
-       kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
-       assert(kr == KERN_SUCCESS);
-       workqueue_lock_spin(wq);
-       tl->th_flags &= ~(TH_LIST_REMOVING_VOUCHER);
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
-               if (thidlecount < 101) {
-                       us_to_wait = wq_reduce_pool_window_usecs - ((thidlecount-2) * (wq_reduce_pool_window_usecs / 100));
-               } else {
-                       us_to_wait = wq_reduce_pool_window_usecs / 100;
-               }
-
-               thread_set_pending_block_hint(thread, kThreadWaitParkedWorkQueue);
-               assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
-                               TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
-                               wq_reduce_pool_window_usecs/10, NSEC_PER_USEC);
-
-               workqueue_unlock(wq);
-
-               thread_block(wq_unpark_continue);
-               panic("thread_block(wq_unpark_continue) returned!");
-       } else {
-               workqueue_unlock(wq);
-
-               /*
-                * While we'd dropped the lock to unset our voucher, someone came
-                * around and made us runnable.  But because we weren't waiting on the
-                * event their wakeup() was ineffectual.  To correct for that, we just
-                * run the continuation ourselves.
-                */
-               wq_unpark_continue(NULL, THREAD_AWAKENED);
-       }
-}
-
-static bool
-may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass,
-               struct threadlist *tl, bool may_start_timer)
-{
-       uint32_t req_qos = _wq_thactive_best_constrained_req_qos(wq);
-       wq_thactive_t thactive;
-
-       if (may_start_timer && at_priclass < req_qos) {
-               /*
-                * When called from workqueue_run_threadreq_and_unlock() pre-post newest
-                * higher priorities into the thactive state so that
-                * workqueue_callback() takes the right decision.
-                *
-                * If the admission check passes, workqueue_run_threadreq_and_unlock
-                * will reset this value before running the request.
-                */
-               thactive = _wq_thactive_set_best_constrained_req_qos(wq, req_qos,
-                               at_priclass);
-#ifdef __LP64__
-               PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 1, (uint64_t)thactive,
-                               (uint64_t)(thactive >> 64), 0, 0);
-#endif
-       } else {
-               thactive = _wq_thactive(wq);
-       }
-
-       uint32_t constrained_threads = wq->wq_constrained_threads_scheduled;
-       if (tl && (tl->th_flags & TH_LIST_CONSTRAINED)) {
-               /*
-                * don't count the current thread as scheduled
-                */
-               constrained_threads--;
-       }
-       if (constrained_threads >= wq_max_constrained_threads) {
-               PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 1,
-                               wq->wq_constrained_threads_scheduled,
-                               wq_max_constrained_threads, 0);
-               /*
-                * we need 1 or more constrained threads to return to the kernel before
-                * we can dispatch additional work
-                */
-               return false;
-       }
-
-       /*
-        * Compute a metric for many how many threads are active.  We find the
-        * highest priority request outstanding and then add up the number of
-        * active threads in that and all higher-priority buckets.  We'll also add
-        * any "busy" threads which are not active but blocked recently enough that
-        * we can't be sure they've gone idle yet.  We'll then compare this metric
-        * to our max concurrency to decide whether to add a new thread.
-        */
-
-       uint32_t busycount, thactive_count;
-
-       thactive_count = _wq_thactive_aggregate_downto_qos(wq, thactive,
-                       at_priclass, &busycount, NULL);
-
-       if (tl && tl->th_priority <= at_priclass) {
-               /*
-                * don't count this thread as currently active
-                */
-               assert(thactive_count > 0);
-               thactive_count--;
-       }
-
-       if (thactive_count + busycount < wq_max_concurrency[at_priclass]) {
-               PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 2,
-                               thactive_count, busycount, 0);
-               return true;
-       } else {
-               PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 3,
-                               thactive_count, busycount, 0);
-       }
-
-       if (busycount && may_start_timer) {
-               /*
-                * If this is called from the add timer, we won't have another timer
-                * fire when the thread exits the "busy" state, so rearm the timer.
-                */
-               if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-                       workqueue_interval_timer_start(wq);
-               }
-       }
-
-       return false;
-}
-
-static struct threadlist *
-pop_from_thidlelist(struct workqueue *wq, uint32_t priclass)
-{
-       assert(wq->wq_thidlecount);
-
-       struct threadlist *tl = NULL;
-
-       if (!TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
-                       (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlelist))){
-               tl = TAILQ_FIRST(&wq->wq_thidlemgrlist);
-               TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
-               assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-       } else if (!TAILQ_EMPTY(&wq->wq_thidlelist) &&
-                       (priclass != WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlemgrlist))){
-               tl = TAILQ_FIRST(&wq->wq_thidlelist);
-               TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-               assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
-       } else {
-               panic("pop_from_thidlelist called with no threads available");
-       }
-       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-
-       assert(wq->wq_thidlecount);
-       wq->wq_thidlecount--;
-
-       TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
-
-       tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
-
-       wq->wq_threads_scheduled++;
-       wq->wq_thscheduled_count[priclass]++;
-       _wq_thactive_inc(wq, priclass);
-       return tl;
-}
-
-static pthread_priority_t
-pthread_priority_from_wq_class_index(struct workqueue *wq, int index)
-{
-       if (index == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               return wq->wq_event_manager_priority;
-       } else {
-               return class_index_get_pthread_priority(index);
-       }
-}
-
-static void
-reset_priority(struct threadlist *tl, pthread_priority_t pri)
-{
-       kern_return_t ret;
-       thread_t th = tl->th_thread;
-
-       if ((pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
-               ret = pthread_kern->thread_set_workq_qos(th, pthread_priority_get_thread_qos(pri), 0);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-               if (tl->th_flags & TH_LIST_EVENT_MGR_SCHED_PRI) {
-
-                       /* Reset priority to default (masked by QoS) */
-
-                       ret = pthread_kern->thread_set_workq_pri(th, 31, POLICY_TIMESHARE);
-                       assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-                       tl->th_flags &= ~TH_LIST_EVENT_MGR_SCHED_PRI;
-               }
-       } else {
-               ret = pthread_kern->thread_set_workq_qos(th, THREAD_QOS_UNSPECIFIED, 0);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-               ret = pthread_kern->thread_set_workq_pri(th, (pri & (~_PTHREAD_PRIORITY_FLAGS_MASK)), POLICY_TIMESHARE);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-               tl->th_flags |= TH_LIST_EVENT_MGR_SCHED_PRI;
-       }
-}
-
-/*
- * Picks the best request to run, and returns the best overcommit fallback
- * if the best pick is non overcommit and risks failing its admission check.
- */
-static struct threadreq *
-workqueue_best_threadreqs(struct workqueue *wq, struct threadlist *tl,
-               struct threadreq **fallback)
-{
-       struct threadreq *req, *best_req = NULL;
-       int priclass, prilimit;
-
-       if ((wq->wq_event_manager_threadreq.tr_state == TR_STATE_WAITING) &&
-                       ((wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0) ||
-                       (tl && tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) {
-               /*
-                * There's an event manager request and either:
-                *   - no event manager currently running
-                *   - we are re-using the event manager
-                */
-               req = &wq->wq_event_manager_threadreq;
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, req, 1, 0, 0);
-               return req;
-       }
-
-       if (tl) {
-               prilimit = WORKQUEUE_EVENT_MANAGER_BUCKET;
-       } else {
-               prilimit = _wq_highest_paced_priority(wq);
-       }
-       for (priclass = 0; priclass < prilimit; priclass++) {
-               req = TAILQ_FIRST(&wq->wq_overcommit_reqlist[priclass]);
-               if (req) {
-                       PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, req, 2, 0, 0);
-                       if (best_req) {
-                               *fallback = req;
-                       } else {
-                               best_req = req;
-                       }
-                       break;
-               }
-               if (!best_req) {
-                       best_req = TAILQ_FIRST(&wq->wq_reqlist[priclass]);
-                       if (best_req) {
-                               PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, best_req, 3, 0, 0);
-                       }
-               }
-       }
-       return best_req;
-}
-
-/**
- * Runs a thread request on a thread
- *
- * - if thread is THREAD_NULL, will find a thread and run the request there.
- *   Otherwise, the thread must be the current thread.
- *
- * - if req is NULL, will find the highest priority request and run that.  If
- *   it is not NULL, it must be a threadreq object in state NEW.  If it can not
- *   be run immediately, it will be enqueued and moved to state WAITING.
- *
- *   Either way, the thread request object serviced will be moved to state
- *   PENDING and attached to the threadlist.
- *
- *   Should be called with the workqueue lock held.  Will drop it.
- *
- *   WARNING: _workq_kevent_reqthreads needs to be able to preflight any
- *   admission checks in this function.  If you are changing this function,
- *   keep that one up-to-date.
- *
- * - if parking_tl is non NULL, then the current thread is parking. This will
- *   try to reuse this thread for a request. If no match is found, it will be
- *   parked.
- */
-static int
-workqueue_run_threadreq_and_unlock(proc_t p, struct workqueue *wq,
-               struct threadlist *parking_tl, struct threadreq *req,
-               bool may_add_new_thread)
-{
-       struct threadreq *incoming_req = req;
-
-       struct threadlist *tl = parking_tl;
-       int rc = WQ_RUN_TR_THROTTLED;
-
-       assert(tl == NULL || tl->th_thread == current_thread());
-       assert(req == NULL || req->tr_state == TR_STATE_NEW);
-       assert(!may_add_new_thread || !tl);
-
-       PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq | DBG_FUNC_START, wq, req,
-                       tl ? thread_tid(tl->th_thread) : 0,
-                       req ? (req->tr_priority << 16 | req->tr_flags) : 0, 0);
-
-       /*
-        * Special cases when provided an event manager request
-        */
-       if (req && req->tr_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               // Clients must not rely on identity of event manager requests
-               assert(req->tr_flags & TR_FLAG_ONSTACK);
-               // You can't be both overcommit and event manager
-               assert((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0);
-
-               /*
-                * We can only ever have one event manager request, so coalesce them if
-                * there's already one outstanding.
-                */
-               if (wq->wq_event_manager_threadreq.tr_state == TR_STATE_WAITING) {
-                       PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_mgr_merge | DBG_FUNC_NONE, wq, req, 0, 0, 0);
-
-                       struct threadreq *existing_req = &wq->wq_event_manager_threadreq;
-                       if (req->tr_flags & TR_FLAG_KEVENT) {
-                               existing_req->tr_flags |= TR_FLAG_KEVENT;
-                       }
-
-                       req = existing_req;
-                       incoming_req = NULL;
-               }
-
-               if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
-                               (!tl || tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET)){
-                       /*
-                        * There can only be one event manager running at a time.
-                        */
-                       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 1, 0, 0, 0);
-                       goto done;
-               }
-       }
-
-again: // Start again after creating a thread
-
-       if (_wq_exiting(wq)) {
-               rc = WQ_RUN_TR_EXITING;
-               goto exiting;
-       }
-
-       /*
-        * Thread request selection and admission control
-        */
-       struct threadreq *fallback = NULL;
-       if (req) {
-               if ((req->tr_flags & TR_FLAG_NO_PACING) == 0 &&
-                               _wq_should_pace_priority(wq, req->tr_priority)) {
-                       /*
-                        * If a request fails the pacing admission check, then thread
-                        * requests are redriven when the pacing thread is finally scheduled
-                        * when it calls _wq_pacing_end() in wq_unpark_continue().
-                        */
-                       goto done;
-               }
-       } else if (wq->wq_reqcount == 0) {
-               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 2, 0, 0, 0);
-               goto done;
-       } else if ((req = workqueue_best_threadreqs(wq, tl, &fallback)) == NULL) {
-               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 3, 0, 0, 0);
-               goto done;
-       }
-
-       if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0 &&
-                       (req->tr_priority < WORKQUEUE_EVENT_MANAGER_BUCKET)) {
-               if (!may_start_constrained_thread(wq, req->tr_priority, parking_tl, true)) {
-                       if (!fallback) {
-                               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 4, 0, 0, 0);
-                               goto done;
-                       }
-                       assert(req->tr_state == TR_STATE_WAITING);
-                       req = fallback;
-               }
-       }
-
-       /*
-        * Thread selection.
-        */
-       if (parking_tl) {
-               if (tl->th_priority != req->tr_priority) {
-                       _wq_thactive_move(wq, tl->th_priority, req->tr_priority);
-                       wq->wq_thscheduled_count[tl->th_priority]--;
-                       wq->wq_thscheduled_count[req->tr_priority]++;
-               }
-               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-                               wq, 1, thread_tid(tl->th_thread), 0, 0);
-       } else if (wq->wq_thidlecount) {
-               tl = pop_from_thidlelist(wq, req->tr_priority);
-               /*
-                * This call will update wq_thscheduled_count and wq_thactive_count for
-                * the provided priority.  It will not set the returned thread to that
-                * priority.  This matches the behavior of the parking_tl clause above.
-                */
-               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-                               wq, 2, thread_tid(tl->th_thread), 0, 0);
-       } else /* no idle threads */ {
-               if (!may_add_new_thread || wq->wq_nthreads >= wq_max_threads) {
-                       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 5,
-                                       may_add_new_thread, wq->wq_nthreads, 0);
-                       if (wq->wq_nthreads < wq_max_threads) {
-                               rc = WQ_RUN_TR_THREAD_NEEDED;
-                       }
-                       goto done;
-               }
-
-               bool added_thread = workqueue_addnewthread(p, wq);
-               /*
-                * workqueue_addnewthread will drop and re-take the lock, so we
-                * need to ensure we still have a cached request.
-                *
-                * It also means we have to pick a new request, since our old pick may
-                * not be valid anymore.
-                */
-               req = incoming_req;
-               if (req && (req->tr_flags & TR_FLAG_ONSTACK)) {
-                       _threadreq_copy_prepare(wq);
-               }
-
-               if (added_thread) {
-                       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-                                       wq, 3, 0, 0, 0);
-                       goto again;
-               } else if (_wq_exiting(wq)) {
-                       rc = WQ_RUN_TR_EXITING;
-                       goto exiting;
-               } else {
-                       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 6, 0, 0, 0);
-                       /*
-                        * Something caused thread creation to fail.  Kick off the timer in
-                        * the hope that it'll succeed next time.
-                        */
-                       if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-                               workqueue_interval_timer_start(wq);
-                       }
-                       goto done;
-               }
-       }
-
-       /*
-        * Setup thread, mark request as complete and run with it.
-        */
-       if (req->tr_state == TR_STATE_WAITING) {
-               _threadreq_dequeue(wq, req);
-       }
-       if (tl->th_priority != req->tr_priority) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-                                       wq, thread_tid(tl->th_thread),
-                                       (tl->th_priority << 16) | req->tr_priority, 1, 0);
-               reset_priority(tl, pthread_priority_from_wq_class_index(wq, req->tr_priority));
-               tl->th_priority = (uint8_t)req->tr_priority;
-       }
-       if (req->tr_flags & TR_FLAG_OVERCOMMIT) {
-               if ((tl->th_flags & TH_LIST_CONSTRAINED) != 0) {
-                       tl->th_flags &= ~TH_LIST_CONSTRAINED;
-                       wq->wq_constrained_threads_scheduled--;
-               }
-       } else {
-               if ((tl->th_flags & TH_LIST_CONSTRAINED) == 0) {
-                       tl->th_flags |= TH_LIST_CONSTRAINED;
-                       wq->wq_constrained_threads_scheduled++;
-               }
-       }
-
-       if (!parking_tl && !(req->tr_flags & TR_FLAG_NO_PACING)) {
-               _wq_pacing_start(wq, tl);
-       }
-       if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
-               uint32_t old_qos, new_qos;
-
-               /*
-                * If we are scheduling a constrained thread request, we may need to
-                * update the best constrained qos in the thactive atomic state.
-                */
-               for (new_qos = 0; new_qos < WQ_THACTIVE_NO_PENDING_REQUEST; new_qos++) {
-                       if (TAILQ_FIRST(&wq->wq_reqlist[new_qos]))
-                               break;
-               }
-               old_qos = _wq_thactive_best_constrained_req_qos(wq);
-               if (old_qos != new_qos) {
-                       wq_thactive_t v = _wq_thactive_set_best_constrained_req_qos(wq,
-                                       old_qos, new_qos);
-#ifdef __LP64__
-                       PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 2, (uint64_t)v,
-                                       (uint64_t)(v >> 64), 0, 0);
-#else
-                       PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 2, v, 0, 0, 0);
-#endif
-               }
-       }
-       {
-               uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI;
-               if (req->tr_flags & TR_FLAG_OVERCOMMIT)
-                       upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
-               if (req->tr_flags & TR_FLAG_KEVENT)
-                       upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-               if (req->tr_flags & TR_FLAG_WORKLOOP)
-                       upcall_flags |= WQ_FLAG_THREAD_WORKLOOP | WQ_FLAG_THREAD_KEVENT;
-               if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET)
-                       upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
-               tl->th_upcall_flags = upcall_flags >> WQ_FLAG_THREAD_PRIOSHIFT;
-       }
-       if (req->tr_flags & TR_FLAG_KEVENT) {
-               tl->th_flags |= TH_LIST_KEVENT;
-       } else {
-               tl->th_flags &= ~TH_LIST_KEVENT;
-       }
-       return _threadreq_complete_and_unlock(p, wq, req, tl);
-
-done:
-       if (incoming_req) {
-               _threadreq_enqueue(wq, incoming_req);
-       }
-
-exiting:
-
-       if (parking_tl && !(parking_tl->th_flags & TH_LIST_UNBINDING)) {
-               parkit(wq, parking_tl, parking_tl->th_thread);
-               __builtin_unreachable();
-       }
-
-       workqueue_unlock(wq);
-
-       return rc;
-}
-
-/**
- * parked thread wakes up
- */
-static void __dead2
-wq_unpark_continue(void* __unused ptr, wait_result_t wait_result)
-{
-       boolean_t first_use = false;
-       thread_t th = current_thread();
-       proc_t p = current_proc();
-
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       if (uth == NULL) goto done;
-
-       struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
-       if (wq == NULL) goto done;
-
-       workqueue_lock_spin(wq);
-
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-       assert(tl != WQ_THREADLIST_EXITING_POISON);
-       if (tl == NULL) {
-               /*
-                * We woke up before addnewthread() was finished setting us up.  Go
-                * ahead and exit, but before we do poison the threadlist variable so
-                * that addnewthread() doesn't think we are valid still.
-                */
-               pthread_kern->uthread_set_threadlist(uth, WQ_THREADLIST_EXITING_POISON);
-               workqueue_unlock(wq);
-               goto done;
-       }
-
-       assert(tl->th_flags & TH_LIST_INITED);
-
-       if ((tl->th_flags & TH_LIST_NEW)){
-               tl->th_flags &= ~(TH_LIST_NEW);
-               first_use = true;
-       }
-
-       if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
-               /*
-                * The normal wakeup path.
-                */
-               goto return_to_user;
-       }
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0 &&
-                       wait_result == THREAD_TIMED_OUT &&
-                       tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET &&
-                       TAILQ_FIRST(&wq->wq_thidlemgrlist) == tl &&
-                       TAILQ_NEXT(tl, th_entry) == NULL){
-               /*
-                * If we are the only idle manager and we pop'ed for self-destruction,
-                * then don't actually exit.  Instead, free our stack to save some
-                * memory and re-park.
-                */
-
-               workqueue_unlock(wq);
-
-               vm_map_t vmap = wq->wq_map;
-
-               // Keep this in sync with _setup_wqthread()
-               const vm_size_t       guardsize = vm_map_page_size(vmap);
-               const user_addr_t     freeaddr = (user_addr_t)tl->th_stackaddr + guardsize;
-               const vm_map_offset_t freesize = vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, vm_map_page_mask(vmap)) - guardsize;
-
-               __assert_only int kr = mach_vm_behavior_set(vmap, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
-#if MACH_ASSERT
-               if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
-                       os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
-               }
-#endif
-
-               workqueue_lock_spin(wq);
-
-               if ( !(tl->th_flags & TH_LIST_RUNNING)) {
-                       thread_set_pending_block_hint(th, kThreadWaitParkedWorkQueue);
-                       assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE));
-
-                       workqueue_unlock(wq);
-
-                       thread_block(wq_unpark_continue);
-                       __builtin_unreachable();
-               }
-       }
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
-               assert((tl->th_flags & TH_LIST_BUSY) == 0);
-               if (!first_use) {
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, 0, 0, 0, 0);
-               }
-               /*
-                * We were set running, but not for the purposes of actually running.
-                * This could be because the timer elapsed.  Or it could be because the
-                * thread aborted.  Either way, we need to return to userspace to exit.
-                *
-                * The call to workqueue_removethread will consume the lock.
-                */
-
-               if (!first_use &&
-                               (tl->th_priority < qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS) ||
-                               (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) {
-                       // Reset the QoS to something low for the pthread cleanup
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-                                               wq, thread_tid(th),
-                                               (tl->th_priority << 16) | qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS), 3, 0);
-                       pthread_priority_t cleanup_pri = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
-                       reset_priority(tl, cleanup_pri);
-               }
-
-               workqueue_removethread(tl, 0, first_use);
-
-               if (first_use){
-                       pthread_kern->thread_bootstrap_return();
-               } else {
-                       pthread_kern->unix_syscall_return(0);
-               }
-               __builtin_unreachable();
-       }
-
-       /*
-        * The timer woke us up or the thread was aborted.  However, we have
-        * already started to make this a runnable thread.  Wait for that to
-        * finish, then continue to userspace.
-        */
-       while ((tl->th_flags & TH_LIST_BUSY)) {
-               assert_wait((caddr_t)tl, (THREAD_UNINT));
-
-               workqueue_unlock(wq);
-
-               thread_block(THREAD_CONTINUE_NULL);
-
-               workqueue_lock_spin(wq);
-       }
-
-return_to_user:
-       if (!first_use) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, 0, 0, 0, 0);
-       }
-       if (_wq_pacing_end(wq, tl) && wq->wq_reqcount) {
-               workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-       } else {
-               workqueue_unlock(wq);
-       }
-       _setup_wqthread(p, th, wq, tl, first_use ? WQ_SETUP_FIRST_USE : 0);
-       pthread_kern->thread_sched_call(th, workqueue_callback);
-done:
-       if (first_use){
-               pthread_kern->thread_bootstrap_return();
-       } else {
-               pthread_kern->unix_syscall_return(EJUSTRETURN);
-       }
-       panic("Our attempt to return to userspace failed...");
-}
+       if (kevent_data_available == WQ_KEVENT_DATA_SIZE) {
+               workq_thread_set_top_addr(th_addrs, kevent_id_addr);
+       } else {
+               workq_thread_set_top_addr(th_addrs,
+                               kevent_data_buf + kevent_data_available);
+       }
+       *kevent_count_out = kevent_count;
+       *kevent_list_out = kevent_list;
+       return ret;
+}
 
 /**
  * configures initial thread stack/registers to jump into:
 
 /**
  * configures initial thread stack/registers to jump into:
@@ -3787,282 +840,90 @@ done:
  * |guard page | guardsize
  * |-----------| th_stackaddr
  */
  * |guard page | guardsize
  * |-----------| th_stackaddr
  */
+__attribute__((noreturn,noinline))
 void
 void
-_setup_wqthread(proc_t p, thread_t th, struct workqueue *wq,
-               struct threadlist *tl, int setup_flags)
+workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
+               mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags)
 {
 {
-       int error;
-       if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) {
-               /*
-                * For preemption reasons, we want to reset the voucher as late as
-                * possible, so we do it in two places:
-                *   - Just before parking (i.e. in parkit())
-                *   - Prior to doing the setup for the next workitem (i.e. here)
-                *
-                * Those two places are sufficient to ensure we always reset it before
-                * it goes back out to user space, but be careful to not break that
-                * guarantee.
-                */
-               __assert_only kern_return_t kr;
-               kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
-               assert(kr == KERN_SUCCESS);
-       }
-
-       uint32_t upcall_flags = tl->th_upcall_flags << WQ_FLAG_THREAD_PRIOSHIFT;
-       if (!(setup_flags & WQ_SETUP_FIRST_USE)) {
-               upcall_flags |= WQ_FLAG_THREAD_REUSE;
-       }
-
-       /*
-        * Put the QoS class value into the lower bits of the reuse_thread register, this is where
-        * the thread priority used to be stored anyway.
-        */
-       pthread_priority_t priority = pthread_priority_from_wq_class_index(wq, tl->th_priority);
-       upcall_flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
-
-       const vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
-       const vm_size_t stack_gap_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_REDZONE_LEN;
-       const vm_size_t stack_align_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_STK_ALIGN;
-
-       user_addr_t pthread_self_addr = (user_addr_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET);
-       user_addr_t stack_top_addr = (user_addr_t)((pthread_self_addr - stack_gap_min) & -stack_align_min);
-       user_addr_t stack_bottom_addr = (user_addr_t)(tl->th_stackaddr + guardsize);
+       struct workq_thread_addrs th_addrs;
+       bool first_use = (setup_flags & WQ_SETUP_FIRST_USE);
+       user_addr_t kevent_list = NULL;
+       int kevent_count = 0;
 
 
-       user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
-       if (!wqstart_fnptr) {
-               panic("workqueue thread start function pointer is NULL");
-       }
+       workq_thread_get_addrs(map, stackaddr, &th_addrs);
 
 
-       if (setup_flags & WQ_SETUP_FIRST_USE) {
+       if (first_use) {
                uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
                if (tsd_offset) {
                uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
                if (tsd_offset) {
-                       mach_vm_offset_t th_tsd_base = (mach_vm_offset_t)pthread_self_addr + tsd_offset;
-                       kern_return_t kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
+                       mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset;
+                       kern_return_t kret = pthread_kern->thread_set_tsd_base(th,
+                                       th_tsd_base);
                        if (kret == KERN_SUCCESS) {
                                upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
                        }
                }
 
                /*
                        if (kret == KERN_SUCCESS) {
                                upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
                        }
                }
 
                /*
-               * Pre-fault the first page of the new thread's stack and the page that will
-               * contain the pthread_t structure.
-               */
-               vm_map_t vmap = pthread_kern->current_map();
-               if (vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
-                               vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap))){
-                       vm_fault( vmap,
-                                       vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
-                                       VM_PROT_READ | VM_PROT_WRITE,
-                                       FALSE,
-                                       THREAD_UNINT, NULL, 0);
+                * Pre-fault the first page of the new thread's stack and the page that will
+                * contain the pthread_t structure.
+                */
+               vm_map_offset_t mask = vm_map_page_mask(map);
+               vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask);
+               vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask);
+               if (th_page != stk_page) {
+                       vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
                }
                }
-               vm_fault( vmap,
-                               vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap)),
-                               VM_PROT_READ | VM_PROT_WRITE,
-                               FALSE,
-                               THREAD_UNINT, NULL, 0);
+               vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
        }
 
        }
 
-       user_addr_t kevent_list = NULL;
-       int kevent_count = 0;
-       if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
-               bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP;
-
-               kevent_list = pthread_self_addr - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
-               kevent_count = WQ_KEVENT_LIST_LEN;
-
-               user_addr_t kevent_id_addr = kevent_list;
-               if (workloop) {
-                       /*
-                        * The kevent ID goes just below the kevent list.  Sufficiently new
-                        * userspace will know to look there.  Old userspace will just
-                        * ignore it.
-                        */
-                       kevent_id_addr -= sizeof(kqueue_id_t);
-               }
-
-               user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE;
-               user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE;
-
-               int32_t events_out = 0;
-
-               assert(tl->th_flags | TH_LIST_KEVENT_BOUND);
+       if (setup_flags & WQ_SETUP_EXIT_THREAD) {
+               kevent_count = WORKQ_EXIT_THREAD_NKEVENT;
+       } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) {
                unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
                unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
-               if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                       flags |= KEVENT_FLAG_WORKQ_MANAGER;
-               }
-               int ret = 0;
-               if (workloop) {
-                       flags |= KEVENT_FLAG_WORKLOOP;
-                       kqueue_id_t kevent_id = -1;
-                       ret = kevent_id_internal(p, &kevent_id,
-                                       NULL, 0, kevent_list, kevent_count,
-                                       kevent_data_buf, &kevent_data_available,
-                                       flags, &events_out);
-                       copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id));
-               } else {
-                       flags |= KEVENT_FLAG_WORKQ;
-                       ret = kevent_qos_internal(p,
-                                       class_index_get_thread_qos(tl->th_priority),
-                                       NULL, 0, kevent_list, kevent_count,
-                                       kevent_data_buf, &kevent_data_available,
-                                       flags, &events_out);
-               }
-
-               // squash any errors into just empty output
-               if (ret != KERN_SUCCESS || events_out == -1){
-                       events_out = 0;
-                       kevent_data_available = WQ_KEVENT_DATA_SIZE;
-               }
-
-               // We shouldn't get data out if there aren't events available
-               assert(events_out != 0 || kevent_data_available == WQ_KEVENT_DATA_SIZE);
-
-               if (events_out > 0){
-                       if (kevent_data_available == WQ_KEVENT_DATA_SIZE){
-                               stack_top_addr = (kevent_id_addr - stack_gap_min) & -stack_align_min;
-                       } else {
-                               stack_top_addr = (kevent_data_buf + kevent_data_available - stack_gap_min) & -stack_align_min;
-                       }
-
-                       kevent_count = events_out;
-               } else {
-                       kevent_list = NULL;
-                       kevent_count = 0;
-               }
+               workq_kevent(p, &th_addrs, upcall_flags, NULL, 0, flags,
+                               &kevent_list, &kevent_count);
        }
 
        }
 
-       PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-#if defined(__i386__) || defined(__x86_64__)
-       if (proc_is64bit(p) == 0) {
-               x86_thread_state32_t state = {
-                       .eip = (unsigned int)wqstart_fnptr,
-                       .eax = /* arg0 */ (unsigned int)pthread_self_addr,
-                       .ebx = /* arg1 */ (unsigned int)tl->th_thport,
-                       .ecx = /* arg2 */ (unsigned int)stack_bottom_addr,
-                       .edx = /* arg3 */ (unsigned int)kevent_list,
-                       .edi = /* arg4 */ (unsigned int)upcall_flags,
-                       .esi = /* arg5 */ (unsigned int)kevent_count,
-
-                       .esp = (int)((vm_offset_t)stack_top_addr),
-               };
+       workq_set_register_state(p, th, &th_addrs, kport,
+                       kevent_list, upcall_flags, kevent_count);
 
 
-               error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-               if (error != KERN_SUCCESS) {
-                       panic(__func__ ": thread_set_wq_state failed: %d", error);
-               }
+       if (first_use) {
+               pthread_kern->thread_bootstrap_return();
        } else {
        } else {
-               x86_thread_state64_t state64 = {
-                       // x86-64 already passes all the arguments in registers, so we just put them in their final place here
-                       .rip = (uint64_t)wqstart_fnptr,
-                       .rdi = (uint64_t)pthread_self_addr,
-                       .rsi = (uint64_t)tl->th_thport,
-                       .rdx = (uint64_t)stack_bottom_addr,
-                       .rcx = (uint64_t)kevent_list,
-                       .r8  = (uint64_t)upcall_flags,
-                       .r9  = (uint64_t)kevent_count,
-
-                       .rsp = (uint64_t)(stack_top_addr)
-               };
-
-               error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
-               if (error != KERN_SUCCESS) {
-                       panic(__func__ ": thread_set_wq_state failed: %d", error);
-               }
+               pthread_kern->unix_syscall_return(EJUSTRETURN);
        }
        }
-#else
-#error setup_wqthread  not defined for this architecture
-#endif
-}
-
-#if DEBUG
-static int wq_kevent_test SYSCTL_HANDLER_ARGS {
-       //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
-#pragma unused(oidp, arg1, arg2)
-       int error;
-       struct workq_reqthreads_req_s requests[64] = {};
-
-       if (req->newlen > sizeof(requests) || req->newlen < sizeof(struct workq_reqthreads_req_s))
-               return EINVAL;
-
-       error = copyin(req->newptr, requests, req->newlen);
-       if (error) return error;
-
-       _workq_reqthreads(req->p, (int)(req->newlen / sizeof(struct workq_reqthreads_req_s)), requests);
-
-       return 0;
+       __builtin_unreachable();
 }
 }
-#endif // DEBUG
-
-#pragma mark - Misc
 
 int
 
 int
-_fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
+workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
+               user_addr_t stackaddr, mach_port_name_t kport,
+               user_addr_t events, int nevents, int upcall_flags)
 {
 {
-       struct workqueue * wq;
-       int error = 0;
-       int     activecount;
-
-       if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       /*
-        * This is sometimes called from interrupt context by the kperf sampler.
-        * In that case, it's not safe to spin trying to take the lock since we
-        * might already hold it.  So, we just try-lock it and error out if it's
-        * already held.  Since this is just a debugging aid, and all our callers
-        * are able to handle an error, that's fine.
-        */
-       bool locked = workqueue_lock_try(wq);
-       if (!locked) {
-               return EBUSY;
-       }
-
-       activecount = _wq_thactive_aggregate_downto_qos(wq, _wq_thactive(wq),
-                       WORKQUEUE_NUM_BUCKETS - 1, NULL, NULL);
-       pwqinfo->pwq_nthreads = wq->wq_nthreads;
-       pwqinfo->pwq_runthreads = activecount;
-       pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
-       pwqinfo->pwq_state = 0;
-
-       if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-               pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
-       }
-
-       if (wq->wq_nthreads >= wq_max_threads) {
-               pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
-       }
-
-       workqueue_unlock(wq);
-       return(error);
-}
+       struct workq_thread_addrs th_addrs;
+       user_addr_t kevent_list = NULL;
+       int kevent_count = 0, error;
+       __assert_only kern_return_t kr;
 
 
-uint32_t
-_get_pwq_state_kdp(proc_t p)
-{
-       if (p == NULL) {
-               return 0;
-       }
+       workq_thread_get_addrs(map, stackaddr, &th_addrs);
 
 
-       struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
+       unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE |
+                       KEVENT_FLAG_PARKING;
+       error = workq_kevent(p, &th_addrs, upcall_flags, events, nevents, flags,
+                       &kevent_list, &kevent_count);
 
 
-       if (wq == NULL || workqueue_lock_spin_is_acquired_kdp(wq)) {
-               return 0;
+       if (error || kevent_count == 0) {
+               return error;
        }
 
        }
 
-       uint32_t pwq_state = WQ_FLAGS_AVAILABLE;
-
-       if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-               pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
-       }
+       kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
+       assert(kr == KERN_SUCCESS);
 
 
-       if (wq->wq_nthreads >= wq_max_threads) {
-               pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
-       }
+       workq_set_register_state(p, th, &th_addrs, kport,
+                       kevent_list, upcall_flags, kevent_count);
 
 
-       return pwq_state;
+       pthread_kern->unix_syscall_return(EJUSTRETURN);
+       __builtin_unreachable();
 }
 
 int
 }
 
 int
@@ -4083,44 +944,16 @@ _pthread_init(void)
         * allocate the lock attribute for pthread synchronizers
         */
        pthread_lck_attr = lck_attr_alloc_init();
         * allocate the lock attribute for pthread synchronizers
         */
        pthread_lck_attr = lck_attr_alloc_init();
-
        pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
 
        pth_global_hashinit();
        psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
        psynch_zoneinit();
 
        pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
 
        pth_global_hashinit();
        psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
        psynch_zoneinit();
 
-       pthread_zone_workqueue = zinit(sizeof(struct workqueue),
-                       1024 * sizeof(struct workqueue), 8192, "pthread.workqueue");
-       pthread_zone_threadlist = zinit(sizeof(struct threadlist),
-                       1024 * sizeof(struct threadlist), 8192, "pthread.threadlist");
-       pthread_zone_threadreq = zinit(sizeof(struct threadreq),
-                       1024 * sizeof(struct threadreq), 8192, "pthread.threadreq");
-
        int policy_bootarg;
        if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) {
                pthread_mutex_default_policy = policy_bootarg;
        }
 
        int policy_bootarg;
        if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) {
                pthread_mutex_default_policy = policy_bootarg;
        }
 
-       /*
-        * register sysctls
-        */
-       sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_max_threads);
-       sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
-       sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
        sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy);
        sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy);
-
-#if DEBUG
-       sysctl_register_oid(&sysctl__debug_wq_kevent_test);
-#endif
-
-       for (int i = 0; i < WORKQUEUE_NUM_BUCKETS; i++) {
-               uint32_t thread_qos = _wq_bucket_to_thread_qos(i);
-               wq_max_concurrency[i] = pthread_kern->qos_max_parallelism(thread_qos,
-                               QOS_PARALLELISM_COUNT_LOGICAL);
-       }
-       wq_max_concurrency[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
 }
 }
index 217ddcb7bd00324cd56aa0985208a2e46a524b17..7dabe413562e5a8bb30ef8a14fece8f94631359f 100644 (file)
@@ -69,6 +69,7 @@
 #include <kern/sched_prim.h>
 #include <kern/processor.h>
 #include <kern/block_hint.h>
 #include <kern/sched_prim.h>
 #include <kern/processor.h>
 #include <kern/block_hint.h>
+#include <kern/turnstile.h>
 //#include <kern/mach_param.h>
 #include <mach/mach_vm.h>
 #include <mach/mach_param.h>
 //#include <kern/mach_param.h>
 #include <mach/mach_vm.h>
 #include <mach/mach_param.h>
@@ -82,7 +83,6 @@
 #include <libkern/OSAtomic.h>
 
 #include <pexpert/pexpert.h>
 #include <libkern/OSAtomic.h>
 
 #include <pexpert/pexpert.h>
-#include <sys/pthread_shims.h>
 
 #include "kern_internal.h"
 #include "synch_internal.h"
 
 #include "kern_internal.h"
 #include "synch_internal.h"
@@ -92,9 +92,7 @@ typedef struct uthread *uthread_t;
 
 //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
 #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
 
 //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
 #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
-
-#define ECVCERORR      256
-#define ECVPERORR      512
+#define __FAILEDUSERTEST2__(s, x...) do { printf("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0)
 
 lck_mtx_t *pthread_list_mlock;
 
 
 lck_mtx_t *pthread_list_mlock;
 
@@ -119,17 +117,23 @@ struct ksyn_queue {
 };
 typedef struct ksyn_queue *ksyn_queue_t;
 
 };
 typedef struct ksyn_queue *ksyn_queue_t;
 
-enum {
+typedef enum {
        KSYN_QUEUE_READ = 0,
        KSYN_QUEUE_READ = 0,
-       KSYN_QUEUE_WRITER,
+       KSYN_QUEUE_WRITE,
        KSYN_QUEUE_MAX,
        KSYN_QUEUE_MAX,
-};
+} kwq_queue_type_t;
+
+typedef enum {
+       KWQ_INTR_NONE = 0,
+       KWQ_INTR_READ = 0x1,
+       KWQ_INTR_WRITE = 0x2,
+} kwq_intr_type_t;
 
 struct ksyn_wait_queue {
        LIST_ENTRY(ksyn_wait_queue) kw_hash;
        LIST_ENTRY(ksyn_wait_queue) kw_list;
        user_addr_t kw_addr;
 
 struct ksyn_wait_queue {
        LIST_ENTRY(ksyn_wait_queue) kw_hash;
        LIST_ENTRY(ksyn_wait_queue) kw_list;
        user_addr_t kw_addr;
-       uint64_t kw_owner;
+       thread_t kw_owner;              /* current owner or THREAD_NULL, has a +1 */
        uint64_t kw_object;             /* object backing in shared mode */
        uint64_t kw_offset;             /* offset inside the object in shared mode */
        int     kw_pflags;              /* flags under listlock protection */
        uint64_t kw_object;             /* object backing in shared mode */
        uint64_t kw_offset;             /* offset inside the object in shared mode */
        int     kw_pflags;              /* flags under listlock protection */
@@ -151,19 +155,23 @@ struct ksyn_wait_queue {
        uint32_t kw_lastseqword;                /* the last seq that unlocked */
        /* for mutex and cvar we need to track I bit values */
        uint32_t kw_nextseqword;        /* the last seq that unlocked; with num of waiters */
        uint32_t kw_lastseqword;                /* the last seq that unlocked */
        /* for mutex and cvar we need to track I bit values */
        uint32_t kw_nextseqword;        /* the last seq that unlocked; with num of waiters */
-       uint32_t kw_overlapwatch;       /* chance for overlaps */
-       uint32_t kw_pre_rwwc;           /* prepost count */
-       uint32_t kw_pre_lockseq;        /* prepost target seq */
-       uint32_t kw_pre_sseq;           /* prepost target sword, in cvar used for mutexowned */
-       uint32_t kw_pre_intrcount;      /* prepost of missed wakeup due to intrs */
-       uint32_t kw_pre_intrseq;        /* prepost of missed wakeup limit seq */
-       uint32_t kw_pre_intrretbits;    /* return bits value for missed wakeup threads */
-       uint32_t kw_pre_intrtype;       /* type of failed wakueps*/
+       struct {
+               uint32_t count; /* prepost count */
+               uint32_t lseq; /* prepost target seq */
+               uint32_t sseq; /* prepost target sword, in cvar used for mutexowned */
+       } kw_prepost;
+       struct {
+               kwq_intr_type_t type; /* type of failed wakueps */
+               uint32_t count; /* prepost of missed wakeup due to intrs */
+               uint32_t seq; /* prepost of missed wakeup limit seq */
+               uint32_t returnbits; /* return bits value for missed wakeup threads */
+       } kw_intr;
        
        int     kw_kflags;
        int             kw_qos_override;        /* QoS of max waiter during contention period */
        
        int     kw_kflags;
        int             kw_qos_override;        /* QoS of max waiter during contention period */
+       struct turnstile *kw_turnstile;
        struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX];        /* queues to hold threads */
        struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX];        /* queues to hold threads */
-       lck_mtx_t kw_lock;              /* mutex lock protecting this structure */
+       lck_spin_t kw_lock;             /* spinlock protecting this structure */
 };
 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 
 };
 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 
@@ -189,14 +197,9 @@ typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 /*
  * Mutex policy attributes
  */
 /*
  * Mutex policy attributes
  */
-#define _PTHREAD_MUTEX_POLICY_NONE             0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE                0x040   /* 1 */
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT         0x080   /* 2 */
-#define _PTHREAD_MUTEX_POLICY_REALTIME         0x0c0   /* 3 */
-#define _PTHREAD_MUTEX_POLICY_ADAPTIVE         0x100   /* 4 */
-#define _PTHREAD_MUTEX_POLICY_PRIPROTECT       0x140   /* 5 */
-#define _PTHREAD_MUTEX_POLICY_PRIINHERIT       0x180   /* 6 */
-#define PTHREAD_POLICY_FLAGS_MASK              0x1c0
+#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE      0x040   /* 1 */
+#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT       0x080   /* 2 */
+#define _PTHREAD_MTX_OPT_POLICY_MASK           0x1c0
 
 /* pflags */
 #define KSYN_WQ_INHASH 2
 
 /* pflags */
 #define KSYN_WQ_INHASH 2
@@ -205,9 +208,10 @@ typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 #define KSYN_WQ_FLIST  0X10    /* in free list to be freed after a short delay */
 
 /* kflags */
 #define KSYN_WQ_FLIST  0X10    /* in free list to be freed after a short delay */
 
 /* kflags */
-#define KSYN_KWF_INITCLEARED   1       /* the init status found and preposts cleared */
-#define KSYN_KWF_ZEROEDOUT     2       /* the lword, etc are inited to 0 */
-#define KSYN_KWF_QOS_APPLIED   4       /* QoS override applied to owner */
+#define KSYN_KWF_INITCLEARED   0x1     /* the init status found and preposts cleared */
+#define KSYN_KWF_ZEROEDOUT     0x2     /* the lword, etc are inited to 0 */
+#define KSYN_KWF_QOS_APPLIED   0x4     /* QoS override applied to owner */
+#define KSYN_KWF_OVERLAP_GUARD 0x8     /* overlap guard */
 
 #define KSYN_CLEANUP_DEADLINE 10
 static int psynch_cleanupset;
 
 #define KSYN_CLEANUP_DEADLINE 10
 static int psynch_cleanupset;
@@ -223,47 +227,24 @@ thread_call_t psynch_thcall;
 
 #define KSYN_WQTYPE_MUTEXDROP  (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
 
 
 #define KSYN_WQTYPE_MUTEXDROP  (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
 
-#define KW_UNLOCK_PREPOST              0x01
-#define KW_UNLOCK_PREPOST_READLOCK     0x08
-#define KW_UNLOCK_PREPOST_WRLOCK       0x20
-
-static void
-CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq)
+static inline int
+_kwq_type(ksyn_wait_queue_t kwq)
 {
 {
-       kwq->kw_pre_lockseq = 0;
-       kwq->kw_pre_sseq = PTHRW_RWS_INIT;
-       kwq->kw_pre_rwwc = 0;
+       return (kwq->kw_type & KSYN_WQTYPE_MASK);
 }
 
 }
 
-static void
-CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq)
+static inline bool
+_kwq_use_turnstile(ksyn_wait_queue_t kwq)
 {
 {
-       kwq->kw_pre_intrcount = 0;
-       kwq->kw_pre_intrseq = 0;
-       kwq->kw_pre_intrretbits = 0;
-       kwq->kw_pre_intrtype = 0;
+       // <rdar://problem/15926625> If we had writer-owner information from the
+       // rwlock then we could use the turnstile to push on it. For now, only
+       // plain mutexes use it.
+       return (_kwq_type(kwq) == KSYN_WQTYPE_MTX);
 }
 
 }
 
-static void
-CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
-{
-       if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
-               if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
-                       panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
-               }
-       };
-       if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
-               kwq->kw_nextseqword = PTHRW_RWS_INIT;
-               kwq->kw_overlapwatch = 0;
-       };
-       CLEAR_PREPOST_BITS(kwq);
-       kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
-       kwq->kw_lastseqword = PTHRW_RWS_INIT;
-       CLEAR_INTR_PREPOST_BITS(kwq);
-       kwq->kw_lword = 0;
-       kwq->kw_uword = 0;
-       kwq->kw_sword = PTHRW_RWS_INIT;
-}
+#define KW_UNLOCK_PREPOST              0x01
+#define KW_UNLOCK_PREPOST_READLOCK     0x08
+#define KW_UNLOCK_PREPOST_WRLOCK       0x20
 
 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
 static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
 
 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
 static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
@@ -272,13 +253,11 @@ static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
 
 static int _wait_result_to_errno(wait_result_t result);
 
 
 static int _wait_result_to_errno(wait_result_t result);
 
-static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t, block_hint_t);
-static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
+static int ksyn_wait(ksyn_wait_queue_t, kwq_queue_type_t, uint32_t, int, uint64_t, uint16_t, thread_continue_t, block_hint_t);
+static kern_return_t ksyn_signal(ksyn_wait_queue_t, kwq_queue_type_t, ksyn_waitq_element_t, uint32_t);
 static void ksyn_freeallkwe(ksyn_queue_t kq);
 
 static void ksyn_freeallkwe(ksyn_queue_t kq);
 
-static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t);
-static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost);
-static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t);
+static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t, thread_t *);
 
 static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
 
 
 static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
 
@@ -299,8 +278,10 @@ static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t
 static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
 static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
 
 static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
 static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
 
-static void psynch_cvcontinue(void *, wait_result_t);
-static void psynch_mtxcontinue(void *, wait_result_t);
+static void __dead2 psynch_cvcontinue(void *, wait_result_t);
+static void __dead2 psynch_mtxcontinue(void *, wait_result_t);
+static void __dead2 psynch_rw_rdcontinue(void *, wait_result_t);
+static void __dead2 psynch_rw_wrcontinue(void *, wait_result_t);
 
 static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
 static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
 
 static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
 static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
@@ -335,6 +316,196 @@ UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc
        }
 }
 
        }
 }
 
+static inline void
+_kwq_clear_preposted_wakeup(ksyn_wait_queue_t kwq)
+{
+       kwq->kw_prepost.lseq = 0;
+       kwq->kw_prepost.sseq = PTHRW_RWS_INIT;
+       kwq->kw_prepost.count = 0;
+}
+
+static inline void
+_kwq_mark_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t count,
+               uint32_t lseq, uint32_t sseq)
+{
+       kwq->kw_prepost.count = count;
+       kwq->kw_prepost.lseq = lseq;
+       kwq->kw_prepost.sseq = sseq;
+}
+
+static inline void
+_kwq_clear_interrupted_wakeup(ksyn_wait_queue_t kwq)
+{
+       kwq->kw_intr.type = KWQ_INTR_NONE;
+       kwq->kw_intr.count = 0;
+       kwq->kw_intr.seq = 0;
+       kwq->kw_intr.returnbits = 0;
+}
+
+static inline void
+_kwq_mark_interruped_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
+               uint32_t count, uint32_t lseq, uint32_t returnbits)
+{
+       kwq->kw_intr.count = count;
+       kwq->kw_intr.seq = lseq;
+       kwq->kw_intr.returnbits = returnbits;
+       kwq->kw_intr.type = type;
+}
+
+static void
+_kwq_destroy(ksyn_wait_queue_t kwq)
+{
+       if (kwq->kw_owner) {
+               thread_deallocate(kwq->kw_owner);
+       }
+       lck_spin_destroy(&kwq->kw_lock, pthread_lck_grp);
+       zfree(kwq_zone, kwq);
+}
+
+#define KWQ_SET_OWNER_TRANSFER_REF  0x1
+
+static inline thread_t
+_kwq_set_owner(ksyn_wait_queue_t kwq, thread_t new_owner, int flags)
+{
+       thread_t old_owner = kwq->kw_owner;
+       if (old_owner == new_owner) {
+               if (flags & KWQ_SET_OWNER_TRANSFER_REF) return new_owner;
+               return THREAD_NULL;
+       }
+       if ((flags & KWQ_SET_OWNER_TRANSFER_REF) == 0) {
+               thread_reference(new_owner);
+       }
+       kwq->kw_owner = new_owner;
+       return old_owner;
+}
+
+static inline thread_t
+_kwq_clear_owner(ksyn_wait_queue_t kwq)
+{
+       return _kwq_set_owner(kwq, THREAD_NULL, KWQ_SET_OWNER_TRANSFER_REF);
+}
+
+static inline void
+_kwq_cleanup_old_owner(thread_t *thread)
+{
+       if (*thread) {
+               thread_deallocate(*thread);
+               *thread = THREAD_NULL;
+       }
+}
+
+static void
+CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
+{
+       if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
+               if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
+                       panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
+               }
+       };
+       if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
+               kwq->kw_nextseqword = PTHRW_RWS_INIT;
+               kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
+       };
+       _kwq_clear_preposted_wakeup(kwq);
+       kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
+       kwq->kw_lastseqword = PTHRW_RWS_INIT;
+       _kwq_clear_interrupted_wakeup(kwq);
+       kwq->kw_lword = 0;
+       kwq->kw_uword = 0;
+       kwq->kw_sword = PTHRW_RWS_INIT;
+}
+
+static bool
+_kwq_handle_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t type,
+               uint32_t lseq, uint32_t *retval)
+{
+       if (kwq->kw_prepost.count == 0 ||
+                       !is_seqlower_eq(lseq, kwq->kw_prepost.lseq)) {
+               return false;
+       }
+
+       kwq->kw_prepost.count--;
+       if (kwq->kw_prepost.count > 0) {
+               return false;
+       }
+
+       int error, should_block = 0;
+       uint32_t updatebits = 0;
+       uint32_t pp_lseq = kwq->kw_prepost.lseq;
+       uint32_t pp_sseq = kwq->kw_prepost.sseq;
+       _kwq_clear_preposted_wakeup(kwq);
+
+       kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+
+       error = kwq_handle_unlock(kwq, pp_lseq, pp_sseq, &updatebits,
+                       (type | KW_UNLOCK_PREPOST), &should_block, lseq);
+       if (error) {
+               panic("_kwq_handle_preposted_wakeup: kwq_handle_unlock failed %d",
+                               error);
+       }
+
+       if (should_block) {
+               return false;
+       }
+       *retval = updatebits;
+       return true;
+}
+
+static bool
+_kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t type, uint32_t lgenval, 
+               uint32_t rw_wc, uint32_t *retval)
+{
+       int res = 0;
+
+       // overlaps only occur on read lockers
+       if (type != PTH_RW_TYPE_READ) {
+               return false;
+       }
+
+       // check for overlap and no pending W bit (indicates writers)
+       if ((kwq->kw_kflags & KSYN_KWF_OVERLAP_GUARD) &&
+                       !is_rws_savemask_set(rw_wc) && !is_rwl_wbit_set(lgenval)) {
+               /* overlap is set, so no need to check for valid state for overlap */
+
+               if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
+                       /* increase the next expected seq by one */
+                       kwq->kw_nextseqword += PTHRW_INC;
+                       /* set count by one & bits from the nextseq and add M bit */
+                       *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
+                       res = 1;
+               }
+       }
+       return res;
+}
+
+static inline bool
+_kwq_is_used(ksyn_wait_queue_t kwq)
+{
+       return (kwq->kw_inqueue != 0 || kwq->kw_prepost.count != 0 ||
+                       kwq->kw_intr.count != 0);
+}
+
+/*
+ * consumes a pending interrupted waiter, returns true if the current
+ * thread should return back to userspace because it was previously
+ * interrupted.
+ */
+static inline bool
+_kwq_handle_interrupted_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
+               uint32_t lseq, uint32_t *retval)
+{
+       if (kwq->kw_intr.count != 0 && kwq->kw_intr.type == type &&
+                       (!kwq->kw_intr.seq || is_seqlower_eq(lseq, kwq->kw_intr.seq))) {
+               kwq->kw_intr.count--;
+               *retval = kwq->kw_intr.returnbits;
+               if (kwq->kw_intr.returnbits == 0) {
+                       _kwq_clear_interrupted_wakeup(kwq);
+               }
+               return true;
+       }
+       return false;
+}
+
 static void
 pthread_list_lock(void)
 {
 static void
 pthread_list_lock(void)
 {
@@ -350,98 +521,117 @@ pthread_list_unlock(void)
 static void
 ksyn_wqlock(ksyn_wait_queue_t kwq)
 {
 static void
 ksyn_wqlock(ksyn_wait_queue_t kwq)
 {
-       
-       lck_mtx_lock(&kwq->kw_lock);
+       lck_spin_lock(&kwq->kw_lock);
 }
 
 static void
 ksyn_wqunlock(ksyn_wait_queue_t kwq)
 {
 }
 
 static void
 ksyn_wqunlock(ksyn_wait_queue_t kwq)
 {
-       lck_mtx_unlock(&kwq->kw_lock);
+       lck_spin_unlock(&kwq->kw_lock);
 }
 
 }
 
-
 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
 static uint32_t
 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
 static uint32_t
-_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags)
+_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen,
+               int flags)
 {
        kern_return_t ret;
        uint32_t returnbits = 0;
 {
        kern_return_t ret;
        uint32_t returnbits = 0;
-       int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+       uint32_t updatebits = 0;
+       int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) ==
+                       _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
        uint32_t nextgen = (ugen + PTHRW_INC);
        uint32_t nextgen = (ugen + PTHRW_INC);
+       thread_t old_owner = THREAD_NULL;
 
        ksyn_wqlock(kwq);
        kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
 
        ksyn_wqlock(kwq);
        kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
-       uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
 
 redrive:
 
 redrive:
+       updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
+                       (PTH_RWL_EBIT | PTH_RWL_KBIT);
+
        if (firstfit) {
                if (kwq->kw_inqueue == 0) {
        if (firstfit) {
                if (kwq->kw_inqueue == 0) {
-                       // not set or the new lock sequence is higher
-                       if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) {
-                               kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK);
-                       }
-                       kwq->kw_pre_rwwc = 1;
-                       ksyn_mtx_drop_qos_override(kwq);
-                       kwq->kw_owner = 0;
-                       // indicate prepost content in kernel
-                       returnbits = mgen | PTH_RWL_PBIT;
+                       uint32_t count = kwq->kw_prepost.count + 1;
+                       // Increment the number of preposters we have waiting
+                       _kwq_mark_preposted_wakeup(kwq, count, mgen & PTHRW_COUNT_MASK, 0);
+                       // We don't know the current owner as we've determined this mutex
+                       // drop should have a preposted locker inbound into the kernel but
+                       // we have no way of knowing who it is. When it arrives, the lock
+                       // path will update the turnstile owner and return it to userspace.
+                       old_owner = _kwq_clear_owner(kwq);
+                       pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
+                                       &kwq->kw_turnstile);
+                       PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+                                       kwq->kw_prepost.lseq, count, 0);
                } else {
                        // signal first waiter
                } else {
                        // signal first waiter
-                       ret = ksyn_mtxsignal(kwq, NULL, updatebits);
+                       ret = ksyn_mtxsignal(kwq, NULL, updatebits, &old_owner);
                        if (ret == KERN_NOT_WAITING) {
                        if (ret == KERN_NOT_WAITING) {
+                               // <rdar://problem/39093536> ksyn_mtxsignal attempts to signal
+                               // the thread but it sets up the turnstile inheritor first.
+                               // That means we can't redrive the mutex in a loop without
+                               // dropping the wq lock and cleaning up the turnstile state.
+                               ksyn_wqunlock(kwq);
+                               pthread_kern->psynch_wait_cleanup();
+                               _kwq_cleanup_old_owner(&old_owner);
+                               ksyn_wqlock(kwq);
                                goto redrive;
                        }
                }
        } else {        
                                goto redrive;
                        }
                }
        } else {        
-               int prepost = 0;
+               bool prepost = false;
                if (kwq->kw_inqueue == 0) {
                        // No waiters in the queue.
                if (kwq->kw_inqueue == 0) {
                        // No waiters in the queue.
-                       prepost = 1;
+                       prepost = true;
                } else {
                } else {
-                       uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK);
+                       uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum & PTHRW_COUNT_MASK);
                        if (low_writer == nextgen) {
                                /* next seq to be granted found */
                                /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
                        if (low_writer == nextgen) {
                                /* next seq to be granted found */
                                /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
-                               ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT);
+                               ret = ksyn_mtxsignal(kwq, NULL,
+                                               updatebits | PTH_RWL_MTX_WAIT, &old_owner);
                                if (ret == KERN_NOT_WAITING) {
                                        /* interrupt post */
                                if (ret == KERN_NOT_WAITING) {
                                        /* interrupt post */
-                                       kwq->kw_pre_intrcount = 1;
-                                       kwq->kw_pre_intrseq = nextgen;
-                                       kwq->kw_pre_intrretbits = updatebits;
-                                       kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
+                                       _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
+                                                       nextgen, updatebits);
                                }
                                }
-                               
                        } else if (is_seqhigher(low_writer, nextgen)) {
                        } else if (is_seqhigher(low_writer, nextgen)) {
-                               prepost = 1;
+                               prepost = true;
                        } else {
                                //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
                                ksyn_waitq_element_t kwe;
                        } else {
                                //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
                                ksyn_waitq_element_t kwe;
-                               kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
+                               kwe = ksyn_queue_find_seq(kwq,
+                                               &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], nextgen);
                                if (kwe != NULL) {
                                        /* next seq to be granted found */
                                        /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
                                if (kwe != NULL) {
                                        /* next seq to be granted found */
                                        /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
-                                       ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT);
+                                       ret = ksyn_mtxsignal(kwq, kwe,
+                                                       updatebits | PTH_RWL_MTX_WAIT, &old_owner);
                                        if (ret == KERN_NOT_WAITING) {
                                                goto redrive;
                                        }
                                } else {
                                        if (ret == KERN_NOT_WAITING) {
                                                goto redrive;
                                        }
                                } else {
-                                       prepost = 1;
+                                       prepost = true;
                                }
                        }
                }
                if (prepost) {
                                }
                        }
                }
                if (prepost) {
-                       ksyn_mtx_drop_qos_override(kwq);
-                       kwq->kw_owner = 0;
-                       if (++kwq->kw_pre_rwwc > 1) {
+                       if (kwq->kw_prepost.count != 0) {
                                __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
                        } else {
                                __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
                        } else {
-                               kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
+                               _kwq_mark_preposted_wakeup(kwq, 1, nextgen & PTHRW_COUNT_MASK,
+                                               0);
                        }
                        }
+                       old_owner = _kwq_clear_owner(kwq);
+                       pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
+                                       &kwq->kw_turnstile);
                }
        }
                }
        }
-       
+
        ksyn_wqunlock(kwq);
        ksyn_wqunlock(kwq);
+       pthread_kern->psynch_wait_cleanup();
+       _kwq_cleanup_old_owner(&old_owner);
        ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
        return returnbits;
 }
        ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
        return returnbits;
 }
@@ -460,354 +650,216 @@ _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
        return res;
 }
 
        return res;
 }
 
-static int
-_ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq,
-                           uint32_t type,
-                           uint32_t lockseq,
-                           uint32_t *retval)
-{
-       int res = 0;
-       if (kwq->kw_pre_intrcount != 0 &&
-               kwq->kw_pre_intrtype == type &&
-               (kwq->kw_pre_intrseq == 0 || is_seqlower_eq(lockseq, kwq->kw_pre_intrseq))) {
-               kwq->kw_pre_intrcount--;
-               *retval = kwq->kw_pre_intrretbits;
-               if (kwq->kw_pre_intrcount == 0) {
-                       CLEAR_INTR_PREPOST_BITS(kwq);
-               }
-               res = 1;
-       }
-       return res;
-}
-
-static int
-_ksyn_handle_overlap(ksyn_wait_queue_t kwq,
-                    uint32_t lgenval,
-                    uint32_t rw_wc,
-                    uint32_t *retval)
+/*
+ * psynch_mutexwait: This system call is used for contended psynch mutexes to
+ * block.
+ */
+int
+_psynch_mutexwait(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
+               uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval)
 {
 {
-       int res = 0;
-
-       // check for overlap and no pending W bit (indicates writers)
-       if (kwq->kw_overlapwatch != 0 &&
-           (rw_wc & PTHRW_RWS_SAVEMASK) == 0 &&
-           (lgenval & PTH_RWL_WBIT) == 0) {
-               /* overlap is set, so no need to check for valid state for overlap */
+       ksyn_wait_queue_t kwq;
+       int error = 0;
+       int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
+                       == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+       int ins_flags = SEQFIT;
+       uint32_t lseq = (mgen & PTHRW_COUNT_MASK);
+       uint32_t updatebits = 0;
+       thread_t tid_th = THREAD_NULL, old_owner = THREAD_NULL;
 
 
-               if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
-                       /* increase the next expected seq by one */
-                       kwq->kw_nextseqword += PTHRW_INC;
-                       /* set count by one & bits from the nextseq and add M bit */
-                       *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
-                       res = 1;
-               }
+       if (firstfit) {
+               /* first fit */
+               ins_flags = FIRSTFIT;
        }
        }
-       return res;
-}
 
 
-static int
-_ksyn_handle_prepost(ksyn_wait_queue_t kwq,
-                    uint32_t type,
-                    uint32_t lockseq,
-                    uint32_t *retval)
-{
-       int res = 0;
-       if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) {
-               kwq->kw_pre_rwwc--;
-               if (kwq->kw_pre_rwwc == 0) {
-                       uint32_t preseq = kwq->kw_pre_lockseq;
-                       uint32_t prerw_wc = kwq->kw_pre_sseq;
-                       CLEAR_PREPOST_BITS(kwq);
-                       if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
-                               kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
-                       }
+       error = ksyn_wqfind(mutex, mgen, ugen, 0, flags,
+                       (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX), &kwq);
+       if (error != 0) {
+               return error;
+       }
 
 
-                       int error, block;
-                       uint32_t updatebits;
-                       error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq);
-                       if (error != 0) {
-                               panic("kwq_handle_unlock failed %d\n", error);
-                       }
+again:
+       ksyn_wqlock(kwq);
 
 
-                       if (block == 0) {
-                               *retval = updatebits;
-                               res = 1;
-                       }
-               }
+       if (_kwq_handle_interrupted_wakeup(kwq, KWQ_INTR_WRITE, lseq, retval)) {
+               old_owner = _kwq_set_owner(kwq, current_thread(), 0);
+               pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
+                               &kwq->kw_turnstile);
+               ksyn_wqunlock(kwq);
+               _kwq_cleanup_old_owner(&old_owner);
+               goto out;
        }
        }
-       return res;
-}
 
 
-/* Helpers for QoS override management. Only applies to mutexes */
-static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost)
-{
-       if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-               boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-               int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread());
-               
-               kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override);
-               
-               if (prepost && kwq->kw_inqueue == 0) {
-                       // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an
-                       // override, because the receiving owner may not re-enter the kernel to signal someone else if it is
-                       // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner
-                       tid = 0;
-               }
-               
-               if (tid != 0) {
-                       if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) {
-                               // hint continues to be accurate, and a boost was already applied
-                               pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, FALSE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                       } else {
-                               // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously
-                               boolean_t boostsucceded;
-                               
-                               boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                               
-                               if (boostsucceded) {
-                                       kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
-                               }
-
-                               if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) {
-                                       // the hint did not match the previous owner, so drop overrides
-                                       PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-                                       pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                               }
-                       }
-               } else {
-                       // new hint tells us that we don't know the owner, so drop any existing overrides
-                       kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-                       kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-
-                       if (wasboosted && (kwq->kw_owner != 0)) {
-                               // the hint did not match the previous owner, so drop overrides
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-                               pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
+       if (kwq->kw_prepost.count && (firstfit || (lseq == kwq->kw_prepost.lseq))) {
+               /* got preposted lock */
+               kwq->kw_prepost.count--;
+
+               if (!firstfit) {
+                       if (kwq->kw_prepost.count > 0) {
+                               __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
+                               kwq->kw_prepost.lseq += PTHRW_INC; /* look for next one */
+                               ksyn_wqunlock(kwq);
+                               error = EINVAL;
+                               goto out;
                        }
                        }
+                       _kwq_clear_preposted_wakeup(kwq);
                }
                }
-       }
-}
 
 
-static boolean_t
-ksyn_mtx_transfer_qos_override_begin(ksyn_wait_queue_t kwq,
-               ksyn_waitq_element_t kwe, uint64_t *kw_owner)
-{
-       boolean_t needs_commit = FALSE;
-       if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-               boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-
-               if (kwq->kw_inqueue > 1) {
-                       boolean_t boostsucceeded;
-
-                       // More than one waiter, so resource will still be contended after handing off ownership
-                       boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-
-                       if (boostsucceeded) {
-                               kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
-                       }
+               if (kwq->kw_inqueue == 0) {
+                       updatebits = lseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
                } else {
                } else {
-                       // kw_inqueue == 1 to get to this point, which means there will be no contention after this point
-                       kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-                       kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-               }
-
-               // Remove the override that was applied to kw_owner. There may have been a race,
-               // in which case it may not match the current thread
-               if (wasboosted) {
-                       if (kwq->kw_owner == 0) {
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
-                       } else if (thread_tid(current_thread()) != kwq->kw_owner) {
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-                               *kw_owner = kwq->kw_owner;
-                               needs_commit = TRUE;
-                       } else {
-                               *kw_owner = 0;
-                               needs_commit = TRUE;
-                       }
+                       updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
+                                       (PTH_RWL_KBIT | PTH_RWL_EBIT);
                }
                }
-       }
-       return needs_commit;
-}
-
-static void
-ksyn_mtx_transfer_qos_override_commit(ksyn_wait_queue_t kwq, uint64_t kw_owner)
-{
-       struct uthread *uthread = kw_owner ? NULL : current_uthread();
-
-       pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(
-                       current_task(), uthread, kw_owner, kwq->kw_addr,
-                       THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-}
+               updatebits &= ~PTH_RWL_MTX_WAIT;
 
 
-static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq)
-{
-       if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-               boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-               
-               // assume nobody else in queue if this routine was called
-               kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-               kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-               
-               // Remove the override that was applied to kw_owner. There may have been a race,
-               // in which case it may not match the current thread
-               if (wasboosted) {
-                       if (kwq->kw_owner == 0) {
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
-                       } else if (thread_tid(current_thread()) != kwq->kw_owner) {
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-                               pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                       } else {
-                               pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                       }
+               if (updatebits == 0) {
+                       __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
                }
                }
-       }
-}
 
 
-/*
- * psynch_mutexwait: This system call is used for contended psynch mutexes to block.
- */
+               PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+                               kwq->kw_prepost.lseq, kwq->kw_prepost.count, 1);
 
 
-int
-_psynch_mutexwait(__unused proc_t p,
-                 user_addr_t mutex,
-                 uint32_t mgen,
-                 uint32_t ugen,
-                 uint64_t tid,
-                 uint32_t flags,
-                 uint32_t *retval)
-{
-       ksyn_wait_queue_t kwq;
-       int error=0;
-       int ins_flags;
-
-       int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
-       uint32_t updatebits = 0;
-
-       uint32_t lockseq = (mgen & PTHRW_COUNT_MASK);
-       
-       if (firstfit == 0) {
-               ins_flags = SEQFIT;
-       } else {
-               /* first fit */
-               ins_flags = FIRSTFIT;
-       }
-       
-       error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
-       if (error != 0) {
-               return(error);
+               old_owner = _kwq_set_owner(kwq, current_thread(), 0);
+               pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
+                               &kwq->kw_turnstile);
+               
+               ksyn_wqunlock(kwq);
+               _kwq_cleanup_old_owner(&old_owner);
+               *retval = updatebits;
+               goto out;
        }
        }
-       
-       ksyn_wqlock(kwq);
 
 
-       // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the
-       // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond
-       // to a stale snapshot after the lock has subsequently been unlocked by another thread.
-       if (tid == 0) {
+       // mutexwait passes in an owner hint at the time userspace contended for
+       // the mutex, however, the owner tid in the userspace data structure may be
+       // unset or SWITCHING (-1), or it may correspond to a stale snapshot after
+       // the lock has subsequently been unlocked by another thread.
+       if (tid == thread_tid(kwq->kw_owner)) {
+               // userspace and kernel agree
+       } else if (tid == 0) {
                // contender came in before owner could write TID
                // contender came in before owner could write TID
-               tid = 0;
-       } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) {
-               // owner is stale, someone has come in and unlocked since this contended read the TID, so
-               // assume what is known in the kernel is accurate
-               tid = kwq->kw_owner;
+               // let's assume that what the kernel knows is accurate
+               // for all we know this waiter came in late in the kernel
+       } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT &&
+                          is_seqlower(ugen, kwq->kw_lastunlockseq)) {
+               // owner is stale, someone has come in and unlocked since this
+               // contended read the TID, so assume what is known in the kernel is
+               // accurate
        } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
        } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
-               // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't
-               // reached the kernel yet. So assume what is known in the kernel is accurate
-               tid = kwq->kw_owner;
+               // userspace didn't know the owner because it was being unlocked, but
+               // that unlocker hasn't reached the kernel yet. So assume what is known
+               // in the kernel is accurate
        } else {
        } else {
-               // hint is being passed in for a specific thread, and we have no reason not to trust
-               // it (like the kernel unlock sequence being higher
-       }
-
-       
-       if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) {
-               ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
-               kwq->kw_owner = thread_tid(current_thread());
-
-               ksyn_wqunlock(kwq);
-               goto out;
-       }
-       
-       if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
-               /* got preposted lock */
-               kwq->kw_pre_rwwc--;
-               if (kwq->kw_pre_rwwc == 0) {
-                       CLEAR_PREPOST_BITS(kwq);
-                       if (kwq->kw_inqueue == 0) {
-                               updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
-                       } else {
-                               updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
-                       }
-                       updatebits &= ~PTH_RWL_MTX_WAIT;
-                       
-                       if (updatebits == 0) {
-                               __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
-                       }
-                       
-                       ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
-                       kwq->kw_owner = thread_tid(current_thread());
-            
-                       ksyn_wqunlock(kwq);
-                       *retval = updatebits;
-                       goto out;
-               } else {
-                       __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
-                       kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
+               // hint is being passed in for a specific thread, and we have no reason
+               // not to trust it (like the kernel unlock sequence being higher)
+               //
+               // So resolve the hint to a thread_t if we haven't done so yet
+               // and redrive as we dropped the lock
+               if (tid_th == THREAD_NULL) {
                        ksyn_wqunlock(kwq);
                        ksyn_wqunlock(kwq);
-                       error = EINVAL;
-                       goto out;
+                       tid_th = pthread_kern->task_findtid(current_task(), tid);
+                       if (tid_th == THREAD_NULL) tid = 0;
+                       goto again;
                }
                }
+               tid_th = _kwq_set_owner(kwq, tid_th, KWQ_SET_OWNER_TRANSFER_REF);
        }
        }
-       
-       ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
-       kwq->kw_owner = tid;
 
 
-       error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue, kThreadWaitPThreadMutex);
+       if (tid_th) {
+               // We are on our way to block, and can't drop the spinlock anymore
+               pthread_kern->thread_deallocate_safe(tid_th);
+               tid_th = THREAD_NULL;
+       }
+       error = ksyn_wait(kwq, KSYN_QUEUE_WRITE, mgen, ins_flags, 0, 0,
+                       psynch_mtxcontinue, kThreadWaitPThreadMutex);
        // ksyn_wait drops wait queue lock
 out:
        // ksyn_wait drops wait queue lock
 out:
-       ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
+       pthread_kern->psynch_wait_cleanup();
+       ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
+       if (tid_th) {
+               thread_deallocate(tid_th);
+       }
        return error;
 }
 
        return error;
 }
 
-void
+void __dead2
 psynch_mtxcontinue(void *parameter, wait_result_t result)
 {
        uthread_t uth = current_uthread();
        ksyn_wait_queue_t kwq = parameter;
        ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
 psynch_mtxcontinue(void *parameter, wait_result_t result)
 {
        uthread_t uth = current_uthread();
        ksyn_wait_queue_t kwq = parameter;
        ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-       
+
+       ksyn_wqlock(kwq);
+
        int error = _wait_result_to_errno(result);
        if (error != 0) {
        int error = _wait_result_to_errno(result);
        if (error != 0) {
-               ksyn_wqlock(kwq);
                if (kwe->kwe_kwqqueue) {
                if (kwe->kwe_kwqqueue) {
-                       ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
+                       ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
                }
                }
-               ksyn_wqunlock(kwq);
        } else {
                uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
                pthread_kern->uthread_set_returnval(uth, updatebits);
        } else {
                uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
                pthread_kern->uthread_set_returnval(uth, updatebits);
-               
-               if (updatebits == 0)
+
+               if (updatebits == 0) {
                        __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
                        __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
+               }
        }
        }
-       ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
+
+       pthread_kern->psynch_wait_complete(kwq, &kwq->kw_turnstile);
+
+       ksyn_wqunlock(kwq);
+       pthread_kern->psynch_wait_cleanup();
+       ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
        pthread_kern->unix_syscall_return(error);
        pthread_kern->unix_syscall_return(error);
+       __builtin_unreachable();
+}
+
+static void __dead2
+_psynch_rw_continue(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
+               wait_result_t result)
+{
+       uthread_t uth = current_uthread();
+       ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
+
+       ksyn_wqlock(kwq);
+
+       int error = _wait_result_to_errno(result);
+       if (error != 0) {
+               if (kwe->kwe_kwqqueue) {
+                       ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
+               }
+       } else {
+               pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
+       }
+
+       ksyn_wqunlock(kwq);
+       ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
+
+       pthread_kern->unix_syscall_return(error);
+       __builtin_unreachable();
+}
+
+void __dead2
+psynch_rw_rdcontinue(void *parameter, wait_result_t result)
+{
+       _psynch_rw_continue(parameter, KSYN_QUEUE_READ, result);
+}
+
+void __dead2
+psynch_rw_wrcontinue(void *parameter, wait_result_t result)
+{
+       _psynch_rw_continue(parameter, KSYN_QUEUE_WRITE, result);
 }
 
 /*
  * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
  */
 int
 }
 
 /*
  * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
  */
 int
-_psynch_mutexdrop(__unused proc_t p,
-                 user_addr_t mutex,
-                 uint32_t mgen,
-                 uint32_t ugen,
-                 uint64_t tid __unused,
-                 uint32_t flags,
-                 uint32_t *retval)
+_psynch_mutexdrop(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
+               uint32_t ugen, uint64_t tid __unused, uint32_t flags, uint32_t *retval)
 {
        int res;
        ksyn_wait_queue_t kwq;
 {
        int res;
        ksyn_wait_queue_t kwq;
-       
+
        res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
        if (res == 0) {
                uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
        res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
        if (res == 0) {
                uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
@@ -821,65 +873,57 @@ _psynch_mutexdrop(__unused proc_t p,
 }
 
 static kern_return_t
 }
 
 static kern_return_t
-ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval)
+ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe,
+               uint32_t updateval, thread_t *old_owner)
 {
        kern_return_t ret;
 {
        kern_return_t ret;
-       boolean_t needs_commit;
-       uint64_t kw_owner;
 
        if (!kwe) {
 
        if (!kwe) {
-               kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist);
+               kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_kwelist);
                if (!kwe) {
                        panic("ksyn_mtxsignal: panic signaling empty queue");
                }
        }
 
                if (!kwe) {
                        panic("ksyn_mtxsignal: panic signaling empty queue");
                }
        }
 
-       needs_commit = ksyn_mtx_transfer_qos_override_begin(kwq, kwe, &kw_owner);
-       kwq->kw_owner = kwe->kwe_tid;
-
-       ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval);
+       PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_START, kwq->kw_addr, kwe,
+                       thread_tid(kwe->kwe_thread), kwq->kw_inqueue);
 
 
-       // if waking the new owner failed, remove any overrides
-       if (ret != KERN_SUCCESS) {
-               ksyn_mtx_drop_qos_override(kwq);
-               kwq->kw_owner = 0;
-       } else if (needs_commit) {
-               ksyn_mtx_transfer_qos_override_commit(kwq, kw_owner);
+       ret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, kwe, updateval);
+       if (ret == KERN_SUCCESS) {
+               *old_owner = _kwq_set_owner(kwq, kwe->kwe_thread, 0);
+       } else {
+               *old_owner = _kwq_clear_owner(kwq);
        }
        }
+       PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_END, kwq->kw_addr, kwe,
+                       ret, 0);
        return ret;
 }
 
 
 static void
        return ret;
 }
 
 
 static void
-ksyn_prepost(ksyn_wait_queue_t kwq,
-            ksyn_waitq_element_t kwe,
-            uint32_t state,
+ksyn_prepost(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t state,
             uint32_t lockseq)
 {
        bzero(kwe, sizeof(*kwe));
        kwe->kwe_state = state;
        kwe->kwe_lockseq = lockseq;
        kwe->kwe_count = 1;
             uint32_t lockseq)
 {
        bzero(kwe, sizeof(*kwe));
        kwe->kwe_state = state;
        kwe->kwe_lockseq = lockseq;
        kwe->kwe_count = 1;
-       
-       (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT);
+
+       (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITE, kwe, lockseq, SEQFIT);
        kwq->kw_fakecount++;
 }
 
 static void
        kwq->kw_fakecount++;
 }
 
 static void
-ksyn_cvsignal(ksyn_wait_queue_t ckwq,
-             thread_t th,
-             uint32_t uptoseq,
-             uint32_t signalseq,
-             uint32_t *updatebits,
-             int *broadcast,
-             ksyn_waitq_element_t *nkwep)
+ksyn_cvsignal(ksyn_wait_queue_t ckwq, thread_t th, uint32_t uptoseq,
+               uint32_t signalseq, uint32_t *updatebits, int *broadcast,
+               ksyn_waitq_element_t *nkwep)
 {
        ksyn_waitq_element_t kwe = NULL;
        ksyn_waitq_element_t nkwe = NULL;
 {
        ksyn_waitq_element_t kwe = NULL;
        ksyn_waitq_element_t nkwe = NULL;
-       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
-       
+       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
+
        uptoseq &= PTHRW_COUNT_MASK;
        uptoseq &= PTHRW_COUNT_MASK;
-       
+
        // Find the specified thread to wake.
        if (th != THREAD_NULL) {
                uthread_t uth = pthread_kern->get_bsdthread_info(th);
        // Find the specified thread to wake.
        if (th != THREAD_NULL) {
                uthread_t uth = pthread_kern->get_bsdthread_info(th);
@@ -893,7 +937,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
                        return;
                }
        }
                        return;
                }
        }
-       
+
        // If no thread was specified, find any thread to wake (with the right
        // sequence number).
        while (th == THREAD_NULL) {
        // If no thread was specified, find any thread to wake (with the right
        // sequence number).
        while (th == THREAD_NULL) {
@@ -906,13 +950,13 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
                        // reacquiring the lock after allocation in
                        // case anything new shows up.
                        ksyn_wqunlock(ckwq);
                        // reacquiring the lock after allocation in
                        // case anything new shows up.
                        ksyn_wqunlock(ckwq);
-                       nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
+                       nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
                        ksyn_wqlock(ckwq);
                } else {
                        break;
                }
        }
                        ksyn_wqlock(ckwq);
                } else {
                        break;
                }
        }
-       
+
        if (kwe != NULL) {
                // If we found a thread to wake...
                if (kwe->kwe_state == KWE_THREAD_INWAIT) {
        if (kwe != NULL) {
                // If we found a thread to wake...
                if (kwe->kwe_state == KWE_THREAD_INWAIT) {
@@ -928,7 +972,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
                                 */
                                *broadcast = 1;
                        } else {
                                 */
                                *broadcast = 1;
                        } else {
-                               (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
+                               (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
                                *updatebits += PTHRW_INC;
                        }
                } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
                                *updatebits += PTHRW_INC;
                        }
                } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
@@ -944,7 +988,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
                         * If we allocated a new kwe above but then found a different kwe to
                         * use then we need to deallocate the spare one.
                         */
                         * If we allocated a new kwe above but then found a different kwe to
                         * use then we need to deallocate the spare one.
                         */
-                       pthread_kern->zfree(kwe_zone, nkwe);
+                       zfree(kwe_zone, nkwe);
                        nkwe = NULL;
                }
        } else if (nkwe != NULL) {
                        nkwe = NULL;
                }
        } else if (nkwe != NULL) {
@@ -954,19 +998,14 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
        } else {
                panic("failed to allocate kwe\n");
        }
        } else {
                panic("failed to allocate kwe\n");
        }
-       
+
        *nkwep = nkwe;
 }
 
 static int
        *nkwep = nkwe;
 }
 
 static int
-__psynch_cvsignal(user_addr_t cv,
-                 uint32_t cgen,
-                 uint32_t cugen,
-                 uint32_t csgen,
-                 uint32_t flags,
-                 int broadcast,
-                 mach_port_name_t threadport,
-                 uint32_t *retval)
+__psynch_cvsignal(user_addr_t cv, uint32_t cgen, uint32_t cugen,
+               uint32_t csgen, uint32_t flags, int broadcast,
+               mach_port_name_t threadport, uint32_t *retval)
 {
        int error = 0;
        thread_t th = THREAD_NULL;
 {
        int error = 0;
        thread_t th = THREAD_NULL;
@@ -997,11 +1036,16 @@ __psynch_cvsignal(user_addr_t cv,
                
                // update L, U and S...
                UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
                
                // update L, U and S...
                UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
-               
+
+               PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_START, kwq->kw_addr,
+                               fromseq, uptoseq, broadcast);
+
                if (!broadcast) {
                        // No need to signal if the CV is already balanced.
                        if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
                if (!broadcast) {
                        // No need to signal if the CV is already balanced.
                        if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
-                               ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe);
+                               ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits,
+                                               &broadcast, &nkwe);
+                               PTHREAD_TRACE(psynch_cvar_signal, kwq->kw_addr, broadcast, 0,0);
                        }
                }
                
                        }
                }
                
@@ -1013,11 +1057,16 @@ __psynch_cvsignal(user_addr_t cv,
                // set C or P bits and free if needed
                ksyn_cvupdate_fixup(kwq, &updatebits);
                *retval = updatebits;
                // set C or P bits and free if needed
                ksyn_cvupdate_fixup(kwq, &updatebits);
                *retval = updatebits;
+
+               PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_END, kwq->kw_addr,
+                               updatebits, 0, 0);
                
                ksyn_wqunlock(kwq);
                
                ksyn_wqunlock(kwq);
+
+               pthread_kern->psynch_wait_cleanup();
                
                if (nkwe != NULL) {
                
                if (nkwe != NULL) {
-                       pthread_kern->zfree(kwe_zone, nkwe);
+                       zfree(kwe_zone, nkwe);
                }
                
                ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
                }
                
                ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
@@ -1034,15 +1083,9 @@ __psynch_cvsignal(user_addr_t cv,
  * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
  */
 int
  * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
  */
 int
-_psynch_cvbroad(__unused proc_t p,
-               user_addr_t cv,
-               uint64_t cvlsgen,
-               uint64_t cvudgen,
-               uint32_t flags,
-               __unused user_addr_t mutex,
-               __unused uint64_t mugen,
-               __unused uint64_t tid,
-               uint32_t *retval)
+_psynch_cvbroad(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+               uint64_t cvudgen, uint32_t flags, __unused user_addr_t mutex,
+               __unused uint64_t mugen, __unused uint64_t tid, uint32_t *retval)
 {
        uint32_t diffgen = cvudgen & 0xffffffff;
        uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
 {
        uint32_t diffgen = cvudgen & 0xffffffff;
        uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
@@ -1062,15 +1105,9 @@ _psynch_cvbroad(__unused proc_t p,
  * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
  */
 int
  * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
  */
 int
-_psynch_cvsignal(__unused proc_t p,
-                user_addr_t cv,
-                uint64_t cvlsgen,
-                uint32_t cvugen,
-                int threadport,
-                __unused user_addr_t mutex,
-                __unused uint64_t mugen,
-                __unused uint64_t tid,
-                uint32_t flags,
+_psynch_cvsignal(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+                uint32_t cvugen, int threadport, __unused user_addr_t mutex,
+                __unused uint64_t mugen, __unused uint64_t tid, uint32_t flags,
                 uint32_t *retval)
 {
        uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
                 uint32_t *retval)
 {
        uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
@@ -1083,16 +1120,9 @@ _psynch_cvsignal(__unused proc_t p,
  * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
  */
 int
  * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
  */
 int
-_psynch_cvwait(__unused proc_t p,
-              user_addr_t cv,
-              uint64_t cvlsgen,
-              uint32_t cvugen,
-              user_addr_t mutex,
-              uint64_t mugen,
-              uint32_t flags,
-              int64_t sec,
-              uint32_t nsec,
-              uint32_t *retval)
+_psynch_cvwait(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+               uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags,
+               int64_t sec, uint32_t nsec, uint32_t *retval)
 {
        int error = 0;
        uint32_t updatebits = 0;
 {
        int error = 0;
        uint32_t updatebits = 0;
@@ -1118,6 +1148,8 @@ _psynch_cvwait(__unused proc_t p,
                __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
                return EINVAL;
        }
                __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
                return EINVAL;
        }
+
+       PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_START, cv, mutex, cgen, 0);
        
        error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
        if (error != 0) {
        
        error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
        if (error != 0) {
@@ -1125,7 +1157,8 @@ _psynch_cvwait(__unused proc_t p,
        }
        
        if (mutex != 0) {
        }
        
        if (mutex != 0) {
-               error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL);
+               uint32_t mutexrv = 0;
+               error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, &mutexrv);
                if (error != 0) {
                        goto out;
                }
                if (error != 0) {
                        goto out;
                }
@@ -1137,7 +1170,7 @@ _psynch_cvwait(__unused proc_t p,
        UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
        
        /* Look for the sequence for prepost (or conflicting thread */
        UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
        
        /* Look for the sequence for prepost (or conflicting thread */
-       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
        kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
        if (kwe != NULL) {
                if (kwe->kwe_state == KWE_THREAD_PREPOST) {
        kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
        if (kwe != NULL) {
                if (kwe->kwe_state == KWE_THREAD_PREPOST) {
@@ -1171,7 +1204,7 @@ _psynch_cvwait(__unused proc_t p,
                }
                
                if (error == 0) {
                }
                
                if (error == 0) {
-                       updatebits = PTHRW_INC;
+                       updatebits |= PTHRW_INC;
                        ckwq->kw_sword += PTHRW_INC;
                        
                        /* set C or P bits and free if needed */
                        ckwq->kw_sword += PTHRW_INC;
                        
                        /* set C or P bits and free if needed */
@@ -1180,45 +1213,54 @@ _psynch_cvwait(__unused proc_t p,
                }
        } else {
                uint64_t abstime = 0;
                }
        } else {
                uint64_t abstime = 0;
+               uint16_t kwe_flags = 0;
 
                if (sec != 0 || (nsec & 0x3fffffff) != 0) {
                        struct timespec ts;
                        ts.tv_sec = (__darwin_time_t)sec;
                        ts.tv_nsec = (nsec & 0x3fffffff);
 
                if (sec != 0 || (nsec & 0x3fffffff) != 0) {
                        struct timespec ts;
                        ts.tv_sec = (__darwin_time_t)sec;
                        ts.tv_nsec = (nsec & 0x3fffffff);
-                       nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
+                       nanoseconds_to_absolutetime(
+                                       (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
                        clock_absolutetime_interval_to_deadline(abstime, &abstime);
                }
                        clock_absolutetime_interval_to_deadline(abstime, &abstime);
                }
+
+               PTHREAD_TRACE(psynch_cvar_kwait, cv, mutex, kwe_flags, 1);
                
                
-               error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue, kThreadWaitPThreadCondVar);
+               error = ksyn_wait(ckwq, KSYN_QUEUE_WRITE, cgen, SEQFIT, abstime,
+                               kwe_flags, psynch_cvcontinue, kThreadWaitPThreadCondVar);
                // ksyn_wait drops wait queue lock
        }
        
        ksyn_wqunlock(ckwq);
                // ksyn_wait drops wait queue lock
        }
        
        ksyn_wqunlock(ckwq);
-       
+
        if (nkwe != NULL) {
        if (nkwe != NULL) {
-               pthread_kern->zfree(kwe_zone, nkwe);
+               zfree(kwe_zone, nkwe);
        }
 out:
        }
 out:
+
+       PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, cv, error, updatebits, 2);
+
        ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
        return error;
 }
 
 
        ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
        return error;
 }
 
 
-void
+void __dead2
 psynch_cvcontinue(void *parameter, wait_result_t result)
 {
        uthread_t uth = current_uthread();
        ksyn_wait_queue_t ckwq = parameter;
        ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
 psynch_cvcontinue(void *parameter, wait_result_t result)
 {
        uthread_t uth = current_uthread();
        ksyn_wait_queue_t ckwq = parameter;
        ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-       
+
        int error = _wait_result_to_errno(result);
        if (error != 0) {
                ksyn_wqlock(ckwq);
                /* just in case it got woken up as we were granting */
        int error = _wait_result_to_errno(result);
        if (error != 0) {
                ksyn_wqlock(ckwq);
                /* just in case it got woken up as we were granting */
-               pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
+               int retval = kwe->kwe_psynchretval;
+               pthread_kern->uthread_set_returnval(uth, retval);
 
                if (kwe->kwe_kwqqueue) {
 
                if (kwe->kwe_kwqqueue) {
-                       ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
+                       ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
                }
                if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
                        /* the condition var granted.
                }
                if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
                        /* the condition var granted.
@@ -1231,46 +1273,48 @@ psynch_cvcontinue(void *parameter, wait_result_t result)
                        
                        /* set C and P bits, in the local error */
                        if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
                        
                        /* set C and P bits, in the local error */
                        if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
-                               error |= ECVCERORR;
+                               PTHREAD_TRACE(psynch_cvar_zeroed, ckwq->kw_addr,
+                                               ckwq->kw_lword, ckwq->kw_sword, ckwq->kw_inqueue);
+                               error |= ECVCLEARED;
                                if (ckwq->kw_inqueue != 0) {
                                if (ckwq->kw_inqueue != 0) {
-                                       ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1);
+                                       ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 1);
                                }
                                ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
                                ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
                        } else {
                                /* everythig in the queue is a fake entry ? */
                                if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
                                }
                                ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
                                ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
                        } else {
                                /* everythig in the queue is a fake entry ? */
                                if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
-                                       error |= ECVPERORR;
+                                       error |= ECVPREPOST;
                                }
                        }
                }
                ksyn_wqunlock(ckwq);
                                }
                        }
                }
                ksyn_wqunlock(ckwq);
+
+               PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
+                               error, 0, 3);
        } else {
                int val = 0;
                // PTH_RWL_MTX_WAIT is removed
                if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
                        val = PTHRW_INC | PTH_RWS_CV_CBIT;
                }
        } else {
                int val = 0;
                // PTH_RWL_MTX_WAIT is removed
                if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
                        val = PTHRW_INC | PTH_RWS_CV_CBIT;
                }
+               PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
+                               val, 0, 4);
                pthread_kern->uthread_set_returnval(uth, val);
        }
        
        ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
        pthread_kern->unix_syscall_return(error);
                pthread_kern->uthread_set_returnval(uth, val);
        }
        
        ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
        pthread_kern->unix_syscall_return(error);
+       __builtin_unreachable();
 }
 
 /*
  * psynch_cvclrprepost: This system call clears pending prepost if present.
  */
 int
 }
 
 /*
  * psynch_cvclrprepost: This system call clears pending prepost if present.
  */
 int
-_psynch_cvclrprepost(__unused proc_t p,
-                    user_addr_t cv,
-                    uint32_t cvgen,
-                    uint32_t cvugen,
-                    uint32_t cvsgen,
-                    __unused uint32_t prepocnt,
-                    uint32_t preposeq,
-                    uint32_t flags,
-                    int *retval)
+_psynch_cvclrprepost(__unused proc_t p, user_addr_t cv, uint32_t cvgen,
+               uint32_t cvugen, uint32_t cvsgen, __unused uint32_t prepocnt,
+               uint32_t preposeq, uint32_t flags, int *retval)
 {
        int error = 0;
        int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
 {
        int error = 0;
        int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
@@ -1279,7 +1323,8 @@ _psynch_cvclrprepost(__unused proc_t p,
        
        *retval = 0;
        
        
        *retval = 0;
        
-       error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq);
+       error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype,
+                       &kwq);
        if (error != 0) {
                return error;
        }
        if (error != 0) {
                return error;
        }
@@ -1287,16 +1332,19 @@ _psynch_cvclrprepost(__unused proc_t p,
        ksyn_wqlock(kwq);
        
        if (mutex) {
        ksyn_wqlock(kwq);
        
        if (mutex) {
-               int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
-               if (firstfit && kwq->kw_pre_rwwc != 0) {
-                       if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) {
-                               // clear prepost
-                               kwq->kw_pre_rwwc = 0;
-                               kwq->kw_pre_lockseq = 0;
+               int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
+                               == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+               if (firstfit && kwq->kw_prepost.count) {
+                       if (is_seqlower_eq(kwq->kw_prepost.lseq, cvgen)) {
+                               PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+                                               kwq->kw_prepost.lseq, 0, 2);
+                               _kwq_clear_preposted_wakeup(kwq);
                        }
                }
        } else {
                        }
                }
        } else {
-               ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0);
+               PTHREAD_TRACE(psynch_cvar_clrprepost, kwq->kw_addr, wqtype,
+                               preposeq, 0);
+               ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITE, preposeq, 0);
        }
        
        ksyn_wqunlock(kwq);
        }
        
        ksyn_wqunlock(kwq);
@@ -1307,50 +1355,47 @@ _psynch_cvclrprepost(__unused proc_t p,
 /* ***************** pthread_rwlock ************************ */
 
 static int
 /* ***************** pthread_rwlock ************************ */
 
 static int
-__psynch_rw_lock(int type,
-                user_addr_t rwlock,
-                uint32_t lgenval,
-                uint32_t ugenval,
-                uint32_t rw_wc,
-                int flags,
-                uint32_t *retval)
+__psynch_rw_lock(int type, user_addr_t rwlock, uint32_t lgenval,
+                uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
 {
 {
-       int prepost_type, kqi;
+       uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
+       ksyn_wait_queue_t kwq;
+       int error, prepost_type, kqi;
+       thread_continue_t tc;
 
        if (type == PTH_RW_TYPE_READ) {
                prepost_type = KW_UNLOCK_PREPOST_READLOCK;
                kqi = KSYN_QUEUE_READ;
 
        if (type == PTH_RW_TYPE_READ) {
                prepost_type = KW_UNLOCK_PREPOST_READLOCK;
                kqi = KSYN_QUEUE_READ;
+               tc = psynch_rw_rdcontinue;
        } else {
                prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
        } else {
                prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
-               kqi = KSYN_QUEUE_WRITER;
+               kqi = KSYN_QUEUE_WRITE;
+               tc = psynch_rw_wrcontinue;
        }
 
        }
 
-       uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
+       error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
+                       (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq);
+       if (error != 0) {
+               return error;
+       }
 
 
-       int error;
-       ksyn_wait_queue_t kwq;
-       error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
-       if (error == 0) {
-               ksyn_wqlock(kwq);
-               _ksyn_check_init(kwq, lgenval);
-               if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) ||
-                   // handle overlap first as they are not counted against pre_rwwc
-                   (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) ||
-                   _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
-                       ksyn_wqunlock(kwq);
-               } else {
-                       block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
-                               kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
-                       error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL, block_hint);
-                       // ksyn_wait drops wait queue lock
-                       if (error == 0) {
-                               uthread_t uth = current_uthread();
-                               ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-                               *retval = kwe->kwe_psynchretval;
-                       }
-               }
-               ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
+       ksyn_wqlock(kwq);
+       _ksyn_check_init(kwq, lgenval);
+       if (_kwq_handle_interrupted_wakeup(kwq, type, lockseq, retval) ||
+                       // handle overlap first as they are not counted against pre_rwwc
+                       // handle_overlap uses the flags in lgenval (vs. lockseq)
+                       _kwq_handle_overlap(kwq, type, lgenval, rw_wc, retval) ||
+                       _kwq_handle_preposted_wakeup(kwq, prepost_type, lockseq, retval)) {
+               ksyn_wqunlock(kwq);
+               goto out;
        }
        }
+
+       block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
+               kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
+       error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, 0, tc, block_hint);
+       // ksyn_wait drops wait queue lock
+out:
+       ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
        return error;
 }
 
        return error;
 }
 
@@ -1358,28 +1403,20 @@ __psynch_rw_lock(int type,
  * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
  */
 int
  * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
  */
 int
-_psynch_rw_rdlock(__unused proc_t p,
-                 user_addr_t rwlock,
-                 uint32_t lgenval,
-                 uint32_t ugenval,
-                 uint32_t rw_wc,
-                 int flags,
-                 uint32_t *retval)
+_psynch_rw_rdlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+               uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
 {
 {
-       return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval);
+       return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc,
+                       flags, retval);
 }
 
 /*
  * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
  */
 int
 }
 
 /*
  * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
  */
 int
-_psynch_rw_longrdlock(__unused proc_t p,
-                     __unused user_addr_t rwlock,
-                     __unused uint32_t lgenval,
-                     __unused uint32_t ugenval,
-                     __unused uint32_t rw_wc,
-                     __unused int flags,
-                     __unused uint32_t *retval)
+_psynch_rw_longrdlock(__unused proc_t p, __unused user_addr_t rwlock,
+               __unused uint32_t lgenval, __unused uint32_t ugenval,
+               __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
 {
        return ESRCH;
 }
 {
        return ESRCH;
 }
@@ -1389,28 +1426,20 @@ _psynch_rw_longrdlock(__unused proc_t p,
  * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
  */
 int
  * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
  */
 int
-_psynch_rw_wrlock(__unused proc_t p,
-                 user_addr_t rwlock,
-                 uint32_t lgenval,
-                 uint32_t ugenval,
-                 uint32_t rw_wc,
-                 int flags,
-                 uint32_t *retval)
+_psynch_rw_wrlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+               uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
 {
 {
-       return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval);
+       return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval,
+                       rw_wc, flags, retval);
 }
 
 /*
  * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
  */
 int
 }
 
 /*
  * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
  */
 int
-_psynch_rw_yieldwrlock(__unused proc_t p,
-                      __unused user_addr_t rwlock,
-                      __unused uint32_t lgenval,
-                      __unused uint32_t ugenval,
-                      __unused uint32_t rw_wc,
-                      __unused int flags,
-                      __unused uint32_t *retval)
+_psynch_rw_yieldwrlock(__unused proc_t p, __unused user_addr_t rwlock,
+               __unused uint32_t lgenval, __unused uint32_t ugenval,
+               __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
 {
        return ESRCH;
 }
 {
        return ESRCH;
 }
@@ -1420,13 +1449,8 @@ _psynch_rw_yieldwrlock(__unused proc_t p,
  *                     reader/writer variety lock.
  */
 int
  *                     reader/writer variety lock.
  */
 int
-_psynch_rw_unlock(__unused proc_t p,
-                 user_addr_t rwlock,
-                 uint32_t lgenval,
-                 uint32_t ugenval,
-                 uint32_t rw_wc,
-                 int flags,
-                 uint32_t *retval)
+_psynch_rw_unlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+               uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
 {
        int error = 0;
        ksyn_wait_queue_t kwq;
 {
        int error = 0;
        ksyn_wait_queue_t kwq;
@@ -1436,7 +1460,8 @@ _psynch_rw_unlock(__unused proc_t p,
        uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
        int clearedkflags = 0;
 
        uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
        int clearedkflags = 0;
 
-       error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
+       error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
+                       (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
        if (error != 0) {
                return(error);
        }
        if (error != 0) {
                return(error);
        }
@@ -1445,7 +1470,8 @@ _psynch_rw_unlock(__unused proc_t p,
        int isinit = _ksyn_check_init(kwq, lgenval);
 
        /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
        int isinit = _ksyn_check_init(kwq, lgenval);
 
        /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
-       if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
+       if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) &&
+                       (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
                error = 0;
                goto out;
        }
                error = 0;
                goto out;
        }
@@ -1466,7 +1492,7 @@ _psynch_rw_unlock(__unused proc_t p,
        
        /* can handle unlock now */
        
        
        /* can handle unlock now */
        
-       CLEAR_PREPOST_BITS(kwq);
+       _kwq_clear_preposted_wakeup(kwq);
        
        error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
 #if __TESTPANICS__
        
        error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
 #if __TESTPANICS__
@@ -1479,26 +1505,25 @@ out:
                *retval = updatebits;
        }
 
                *retval = updatebits;
        }
 
-       // <rdar://problem/22244050> If any of the wakeups failed because they already
-       // returned to userspace because of a signal then we need to ensure that the
-       // reset state is not cleared when that thread returns. Otherwise,
+       // <rdar://problem/22244050> If any of the wakeups failed because they
+       // already returned to userspace because of a signal then we need to ensure
+       // that the reset state is not cleared when that thread returns. Otherwise,
        // _pthread_rwlock_lock will clear the interrupted state before it is read.
        // _pthread_rwlock_lock will clear the interrupted state before it is read.
-       if (clearedkflags != 0 && kwq->kw_pre_intrcount > 0) {
+       if (clearedkflags != 0 && kwq->kw_intr.count > 0) {
                kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
        }
        
        ksyn_wqunlock(kwq);
                kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
        }
        
        ksyn_wqunlock(kwq);
+       pthread_kern->psynch_wait_cleanup();
        ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
        
        return(error);
        
 prepost:
        /* update if the new seq is higher than prev prepost, or first set */
        ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
        
        return(error);
        
 prepost:
        /* update if the new seq is higher than prev prepost, or first set */
-       if (is_rws_setseq(kwq->kw_pre_sseq) ||
-           is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) {
-               kwq->kw_pre_rwwc = (diff - count);
-               kwq->kw_pre_lockseq = curgen;
-               kwq->kw_pre_sseq = rw_wc;
+       if (is_rws_sbit_set(kwq->kw_prepost.sseq) ||
+                       is_seqhigher_eq(rw_wc, kwq->kw_prepost.sseq)) {
+               _kwq_mark_preposted_wakeup(kwq, diff - count, curgen, rw_wc);
                updatebits = lgenval;   /* let this not do unlock handling */
        }
        error = 0;
                updatebits = lgenval;   /* let this not do unlock handling */
        }
        error = 0;
@@ -1526,13 +1551,9 @@ _pth_proc_hashinit(proc_t p)
 
 
 static int
 
 
 static int
-ksyn_wq_hash_lookup(user_addr_t uaddr,
-                   proc_t p,
-                   int flags,
-                   ksyn_wait_queue_t *out_kwq,
-                   struct pthhashhead **out_hashptr,
-                   uint64_t *out_object,
-                   uint64_t *out_offset)
+ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags,
+               ksyn_wait_queue_t *out_kwq, struct pthhashhead **out_hashptr,
+               uint64_t *out_object, uint64_t *out_offset)
 {
        int res = 0;
        ksyn_wait_queue_t kwq;
 {
        int res = 0;
        ksyn_wait_queue_t kwq;
@@ -1593,9 +1614,8 @@ _pth_proc_hashdelete(proc_t p)
                        pthread_list_unlock();
                        /* release fake entries if present for cvars */
                        if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
                        pthread_list_unlock();
                        /* release fake entries if present for cvars */
                        if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
-                               ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
-                       lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
-                       pthread_kern->zfree(kwq_zone, kwq);
+                               ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE]);
+                       _kwq_destroy(kwq);
                        pthread_list_lock();
                }
        }
                        pthread_list_lock();
                }
        }
@@ -1611,14 +1631,49 @@ ksyn_freeallkwe(ksyn_queue_t kq)
        while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
                TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
                if (kwe->kwe_state != KWE_THREAD_INWAIT) {
        while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
                TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
                if (kwe->kwe_state != KWE_THREAD_INWAIT) {
-                       pthread_kern->zfree(kwe_zone, kwe);
+                       zfree(kwe_zone, kwe);
                }
        }
 }
 
                }
        }
 }
 
+static inline void
+_kwq_report_inuse(ksyn_wait_queue_t kwq)
+{
+       if (kwq->kw_prepost.count != 0) {
+               __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [pre %d:0x%x:0x%x]",
+                               (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_prepost.count,
+                               kwq->kw_prepost.lseq, kwq->kw_prepost.sseq);
+               PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+                               kwq->kw_type, 1, 0);
+       }
+       if (kwq->kw_intr.count != 0) {
+               __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [intr %d:0x%x:0x%x:0x%x]",
+                               (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_intr.count,
+                               kwq->kw_intr.type, kwq->kw_intr.seq,
+                               kwq->kw_intr.returnbits);
+               PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+                               kwq->kw_type, 2, 0);
+       }
+       if (kwq->kw_iocount) {
+               __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [ioc %d:%d]",
+                               (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_iocount,
+                               kwq->kw_dropcount);
+               PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+                               kwq->kw_type, 3, 0);
+       }
+       if (kwq->kw_inqueue) {
+               __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [inq %d:%d]",
+                               (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_inqueue,
+                               kwq->kw_fakecount);
+               PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, kwq->kw_type,
+                               4, 0);
+       }
+}
+
 /* find kernel waitqueue, if not present create one. Grants a reference  */
 int
 /* find kernel waitqueue, if not present create one. Grants a reference  */
 int
-ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp)
+ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen,
+               int flags, int wqtype, ksyn_wait_queue_t *kwqp)
 {
        int res = 0;
        ksyn_wait_queue_t kwq = NULL;
 {
        int res = 0;
        ksyn_wait_queue_t kwq = NULL;
@@ -1636,7 +1691,8 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
 
        while (res == 0) {
                pthread_list_lock();
 
        while (res == 0) {
                pthread_list_lock();
-               res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset);
+               res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr,
+                               &object, &offset);
                if (res != 0) {
                        pthread_list_unlock();
                        break;
                if (res != 0) {
                        pthread_list_unlock();
                        break;
@@ -1645,13 +1701,13 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
                        // Drop the lock to allocate a new kwq and retry.
                        pthread_list_unlock();
 
                        // Drop the lock to allocate a new kwq and retry.
                        pthread_list_unlock();
 
-                       nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone);
+                       nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone);
                        bzero(nkwq, sizeof(struct ksyn_wait_queue));
                        int i;
                        for (i = 0; i < KSYN_QUEUE_MAX; i++) {
                                ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
                        }
                        bzero(nkwq, sizeof(struct ksyn_wait_queue));
                        int i;
                        for (i = 0; i < KSYN_QUEUE_MAX; i++) {
                                ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
                        }
-                       lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
+                       lck_spin_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
                        continue;
                } else if (kwq == NULL && nkwq != NULL) {
                        // Still not found, add the new kwq to the hash.
                        continue;
                } else if (kwq == NULL && nkwq != NULL) {
                        // Still not found, add the new kwq to the hash.
@@ -1671,21 +1727,23 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
                                kwq->kw_pflags &= ~KSYN_WQ_FLIST;
                        }
                        if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
                                kwq->kw_pflags &= ~KSYN_WQ_FLIST;
                        }
                        if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
-                               if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) {
+                               if (!_kwq_is_used(kwq)) {
                                        if (kwq->kw_iocount == 0) {
                                                kwq->kw_type = 0; // mark for reinitialization
                                        if (kwq->kw_iocount == 0) {
                                                kwq->kw_type = 0; // mark for reinitialization
-                                       } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) {
+                                       } else if (kwq->kw_iocount == 1 &&
+                                                       kwq->kw_dropcount == kwq->kw_iocount) {
                                                /* if all users are unlockers then wait for it to finish */
                                                kwq->kw_pflags |= KSYN_WQ_WAITING;
                                                // Drop the lock and wait for the kwq to be free.
                                                /* if all users are unlockers then wait for it to finish */
                                                kwq->kw_pflags |= KSYN_WQ_WAITING;
                                                // Drop the lock and wait for the kwq to be free.
-                                               (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
+                                               (void)msleep(&kwq->kw_pflags, pthread_list_mlock,
+                                                               PDROP, "ksyn_wqfind", 0);
                                                continue;
                                        } else {
                                                continue;
                                        } else {
-                                               __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
+                                               _kwq_report_inuse(kwq);
                                                res = EINVAL;
                                        }
                                } else {
                                                res = EINVAL;
                                        }
                                } else {
-                                       __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
+                                       _kwq_report_inuse(kwq);
                                        res = EINVAL;
                                }
                        }
                                        res = EINVAL;
                                }
                        }
@@ -1700,9 +1758,13 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
                                kwq->kw_lword = mgen;
                                kwq->kw_uword = ugen;
                                kwq->kw_sword = sgen;
                                kwq->kw_lword = mgen;
                                kwq->kw_uword = ugen;
                                kwq->kw_sword = sgen;
-                               kwq->kw_owner = 0;
+                               kwq->kw_owner = THREAD_NULL;
                                kwq->kw_kflags = 0;
                                kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
                                kwq->kw_kflags = 0;
                                kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
+                               PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_START, uaddr,
+                                               kwq->kw_type, kwq, 0);
+                               PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_END, uaddr,
+                                               mgen, ugen, sgen);
                        }
                        kwq->kw_iocount++;
                        if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
                        }
                        kwq->kw_iocount++;
                        if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
@@ -1716,8 +1778,7 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
                *kwqp = kwq;
        }
        if (nkwq) {
                *kwqp = kwq;
        }
        if (nkwq) {
-               lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
-               pthread_kern->zfree(kwq_zone, nkwq);
+               _kwq_destroy(nkwq);
        }
        return res;
 }
        }
        return res;
 }
@@ -1740,7 +1801,16 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
                        wakeup(&kwq->kw_pflags);
                }
                
                        wakeup(&kwq->kw_pflags);
                }
                
-               if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) {
+               if (!_kwq_is_used(kwq)) {
+                       if (kwq->kw_turnstile) {
+                               panic("kw_turnstile still non-null upon release");
+                       }
+
+                       PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_START,
+                                       kwq->kw_addr, kwq->kw_type, qfreenow, 0);
+                       PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_END,
+                                       kwq->kw_addr, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword);
+
                        if (qfreenow == 0) {
                                microuptime(&kwq->kw_ts);
                                LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
                        if (qfreenow == 0) {
                                microuptime(&kwq->kw_ts);
                                LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
@@ -1762,8 +1832,7 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
        }
        pthread_list_unlock();
        if (free_elem != NULL) {
        }
        pthread_list_unlock();
        if (free_elem != NULL) {
-               lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp);
-               pthread_kern->zfree(kwq_zone, free_elem);
+               _kwq_destroy(free_elem);
        }
 }
 
        }
 }
 
@@ -1771,7 +1840,7 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
 void
 psynch_wq_cleanup(__unused void *param, __unused void * param1)
 {
 void
 psynch_wq_cleanup(__unused void *param, __unused void * param1)
 {
-       ksyn_wait_queue_t kwq;
+       ksyn_wait_queue_t kwq, tmp;
        struct timeval t;
        int reschedule = 0;
        uint64_t deadline = 0;
        struct timeval t;
        int reschedule = 0;
        uint64_t deadline = 0;
@@ -1783,7 +1852,7 @@ psynch_wq_cleanup(__unused void *param, __unused void * param1)
        microuptime(&t);
        
        LIST_FOREACH(kwq, &pth_free_list, kw_list) {
        microuptime(&t);
        
        LIST_FOREACH(kwq, &pth_free_list, kw_list) {
-               if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) {
+               if (_kwq_is_used(kwq) || kwq->kw_iocount != 0) {
                        // still in use
                        continue;
                }
                        // still in use
                        continue;
                }
@@ -1810,10 +1879,8 @@ psynch_wq_cleanup(__unused void *param, __unused void * param1)
        }
        pthread_list_unlock();
 
        }
        pthread_list_unlock();
 
-       while ((kwq = LIST_FIRST(&freelist)) != NULL) {
-               LIST_REMOVE(kwq, kw_list);
-               lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
-               pthread_kern->zfree(kwq_zone, kwq);
+       LIST_FOREACH_SAFE(kwq, &freelist, kw_list, tmp) {
+               _kwq_destroy(kwq);
        }
 }
 
        }
 }
 
@@ -1833,25 +1900,25 @@ _wait_result_to_errno(wait_result_t result)
 }
 
 int
 }
 
 int
-ksyn_wait(ksyn_wait_queue_t kwq,
-         int kqi,
-         uint32_t lockseq,
-         int fit,
-         uint64_t abstime,
-         thread_continue_t continuation,
-         block_hint_t block_hint)
+ksyn_wait(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, uint32_t lockseq,
+               int fit, uint64_t abstime, uint16_t kwe_flags,
+               thread_continue_t continuation, block_hint_t block_hint)
 {
 {
-       int res;
-
        thread_t th = current_thread();
        uthread_t uth = pthread_kern->get_bsdthread_info(th);
        thread_t th = current_thread();
        uthread_t uth = pthread_kern->get_bsdthread_info(th);
+       struct turnstile **tstore = NULL;
+       int res;
+
+       assert(continuation != THREAD_CONTINUE_NULL);
+
        ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
        bzero(kwe, sizeof(*kwe));
        kwe->kwe_count = 1;
        kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
        kwe->kwe_state = KWE_THREAD_INWAIT;
        kwe->kwe_uth = uth;
        ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
        bzero(kwe, sizeof(*kwe));
        kwe->kwe_count = 1;
        kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
        kwe->kwe_state = KWE_THREAD_INWAIT;
        kwe->kwe_uth = uth;
-       kwe->kwe_tid = thread_tid(th);
+       kwe->kwe_thread = th;
+       kwe->kwe_flags = kwe_flags;
 
        res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
        if (res != 0) {
 
        res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
        if (res != 0) {
@@ -1859,43 +1926,39 @@ ksyn_wait(ksyn_wait_queue_t kwq,
                ksyn_wqunlock(kwq);
                return res;
        }
                ksyn_wqunlock(kwq);
                return res;
        }
-       
-       thread_set_pending_block_hint(th, block_hint);
-       assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
+
+       PTHREAD_TRACE(psynch_mutex_kwqwait, kwq->kw_addr, kwq->kw_inqueue,
+                       kwq->kw_prepost.count, kwq->kw_intr.count);
+
+       if (_kwq_use_turnstile(kwq)) {
+               // pthread mutexes and rwlocks both (at least sometimes) know their
+               // owner and can use turnstiles. Otherwise, we pass NULL as the
+               // tstore to the shims so they wait on the global waitq.
+               tstore = &kwq->kw_turnstile;
+       }
+
+       pthread_kern->psynch_wait_prepare((uintptr_t)kwq, tstore, kwq->kw_owner,
+                       block_hint, abstime);
+
        ksyn_wqunlock(kwq);
        ksyn_wqunlock(kwq);
-       
-       kern_return_t ret;
-       if (continuation == THREAD_CONTINUE_NULL) {
-               ret = thread_block(NULL);
-       } else {
-               ret = thread_block_parameter(continuation, kwq);
-               
-               // If thread_block_parameter returns (interrupted) call the
-               // continuation manually to clean up.
-               continuation(kwq, ret);
-               
-               // NOT REACHED
-               panic("ksyn_wait continuation returned");
+
+       if (tstore) {
+               pthread_kern->psynch_wait_update_complete(kwq->kw_turnstile);
        }
        
        }
        
-       res = _wait_result_to_errno(ret);
-       if (res != 0) {
-               ksyn_wqlock(kwq);
-               if (kwe->kwe_kwqqueue) {
-                       ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
-               }
-               ksyn_wqunlock(kwq);
-       }
-       return res;
+       thread_block_parameter(continuation, kwq);
+
+       // NOT REACHED
+       panic("ksyn_wait continuation returned");
+       __builtin_unreachable();
 }
 
 kern_return_t
 }
 
 kern_return_t
-ksyn_signal(ksyn_wait_queue_t kwq,
-           int kqi,
-           ksyn_waitq_element_t kwe,
-           uint32_t updateval)
+ksyn_signal(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
+               ksyn_waitq_element_t kwe, uint32_t updateval)
 {
        kern_return_t ret;
 {
        kern_return_t ret;
+       struct turnstile **tstore = NULL;
 
        // If no wait element was specified, wake the first.
        if (!kwe) {
 
        // If no wait element was specified, wake the first.
        if (!kwe) {
@@ -1912,7 +1975,12 @@ ksyn_signal(ksyn_wait_queue_t kwq,
        ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
        kwe->kwe_psynchretval = updateval;
 
        ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
        kwe->kwe_psynchretval = updateval;
 
-       ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
+       if (_kwq_use_turnstile(kwq)) {
+               tstore = &kwq->kw_turnstile;
+       }
+
+       ret = pthread_kern->psynch_wait_wakeup(kwq, kwe, tstore);
+
        if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
                panic("ksyn_signal: panic waking up thread %x\n", ret);
        }
        if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
                panic("ksyn_signal: panic waking up thread %x\n", ret);
        }
@@ -1925,7 +1993,8 @@ ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
        kern_return_t ret;
        vm_page_info_basic_data_t info;
        mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
        kern_return_t ret;
        vm_page_info_basic_data_t info;
        mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
-       ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
+       ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr,
+                       VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
        if (ret != KERN_SUCCESS) {
                return EINVAL;
        }
        if (ret != KERN_SUCCESS) {
                return EINVAL;
        }
@@ -1943,20 +2012,22 @@ ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
 
 /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
 int
 
 /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
 int
-kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[])
+kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen,
+               int *typep, uint32_t lowest[])
 {
        uint32_t kw_fr, kw_fwr, low;
        int type = 0, lowtype, typenum[2] = { 0 };
        uint32_t numbers[2] = { 0 };
        int count = 0, i;
        
 {
        uint32_t kw_fr, kw_fwr, low;
        int type = 0, lowtype, typenum[2] = { 0 };
        uint32_t numbers[2] = { 0 };
        int count = 0, i;
        
-       
-       if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
+       if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) ||
+                       ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
                type |= PTH_RWSHFT_TYPE_READ;
                /* read entries are present */
                if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
                        kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
                type |= PTH_RWSHFT_TYPE_READ;
                /* read entries are present */
                if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
                        kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
-                       if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
+                       if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) &&
+                                       (is_seqlower(premgen, kw_fr) != 0))
                                kw_fr = premgen;
                } else
                        kw_fr = premgen;
                                kw_fr = premgen;
                } else
                        kw_fr = premgen;
@@ -1968,22 +2039,24 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type
        } else
                lowest[KSYN_QUEUE_READ] = 0;
        
        } else
                lowest[KSYN_QUEUE_READ] = 0;
        
-       if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
+       if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) ||
+                       ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
                type |= PTH_RWSHFT_TYPE_WRITE;
                /* read entries are present */
                type |= PTH_RWSHFT_TYPE_WRITE;
                /* read entries are present */
-               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
-                       kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
-                       if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
+               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
+                       kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum;
+                       if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) &&
+                                       (is_seqlower(premgen, kw_fwr) != 0))
                                kw_fwr = premgen;
                } else
                        kw_fwr = premgen;
                
                                kw_fwr = premgen;
                } else
                        kw_fwr = premgen;
                
-               lowest[KSYN_QUEUE_WRITER] = kw_fwr;
+               lowest[KSYN_QUEUE_WRITE] = kw_fwr;
                numbers[count]= kw_fwr;
                typenum[count] = PTH_RW_TYPE_WRITE;
                count++;
        } else
                numbers[count]= kw_fwr;
                typenum[count] = PTH_RW_TYPE_WRITE;
                count++;
        } else
-               lowest[KSYN_QUEUE_WRITER] = 0;
+               lowest[KSYN_QUEUE_WRITE] = 0;
        
 #if __TESTPANICS__
        if (count == 0)
        
 #if __TESTPANICS__
        if (count == 0)
@@ -2009,7 +2082,8 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type
 
 /* wakeup readers to upto the writer limits */
 int
 
 /* wakeup readers to upto the writer limits */
 int
-ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp)
+ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders,
+               uint32_t updatebits, int *wokenp)
 {
        ksyn_queue_t kq;
        int failedwakeup = 0;
 {
        ksyn_queue_t kq;
        int failedwakeup = 0;
@@ -2020,7 +2094,8 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, ui
        lbits = updatebits;
        
        kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
        lbits = updatebits;
        
        kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
-       while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
+       while ((kq->ksynq_count != 0) &&
+                       (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
                kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
                if (kret == KERN_NOT_WAITING) {
                        failedwakeup++;
                kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
                if (kret == KERN_NOT_WAITING) {
                        failedwakeup++;
@@ -2034,19 +2109,17 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, ui
 }
 
 
 }
 
 
-/* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
+/*
+ * This handles the unlock grants for next set on rw_unlock() or on arrival
+ * of all preposted waiters.
+ */
 int
 int
-kwq_handle_unlock(ksyn_wait_queue_t kwq,
-                 __unused uint32_t mgen,
-                 uint32_t rw_wc,
-                 uint32_t *updatep,
-                 int flags,
-                 int *blockp,
-                 uint32_t premgen)
+kwq_handle_unlock(ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t rw_wc,
+               uint32_t *updatep, int flags, int *blockp, uint32_t premgen)
 {
        uint32_t low_writer, limitrdnum;
        int rwtype, error=0;
 {
        uint32_t low_writer, limitrdnum;
        int rwtype, error=0;
-       int allreaders, failed;
+       int allreaders, nfailed;
        uint32_t updatebits=0, numneeded = 0;;
        int prepost = flags & KW_UNLOCK_PREPOST;
        thread_t preth = THREAD_NULL;
        uint32_t updatebits=0, numneeded = 0;;
        int prepost = flags & KW_UNLOCK_PREPOST;
        thread_t preth = THREAD_NULL;
@@ -2067,7 +2140,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
        kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
        kwq->kw_lastseqword = rw_wc;
        kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
        kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
        kwq->kw_lastseqword = rw_wc;
        kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
-       kwq->kw_overlapwatch = 0;
+       kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
        
        error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
 #if __TESTPANICS__
        
        error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
 #if __TESTPANICS__
@@ -2075,7 +2148,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                panic("rwunlock: cannot fails to slot next round of threads");
 #endif /* __TESTPANICS__ */
        
                panic("rwunlock: cannot fails to slot next round of threads");
 #endif /* __TESTPANICS__ */
        
-       low_writer = lowest[KSYN_QUEUE_WRITER];
+       low_writer = lowest[KSYN_QUEUE_WRITE];
        
        allreaders = 0;
        updatebits = 0;
        
        allreaders = 0;
        updatebits = 0;
@@ -2108,7 +2181,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                        } else {
                                // no writers at all
                                // no other waiters only readers
                        } else {
                                // no writers at all
                                // no other waiters only readers
-                               kwq->kw_overlapwatch = 1;
+                               kwq->kw_kflags |= KSYN_KWF_OVERLAP_GUARD;
                                numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
                                if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
                                        curthreturns = 1;
                                numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
                                if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
                                        curthreturns = 1;
@@ -2128,18 +2201,19 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                        }
                        
                        
                        }
                        
                        
-                       failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken);
-                       if (failed != 0) {
-                               kwq->kw_pre_intrcount = failed; /* actually a count */
-                               kwq->kw_pre_intrseq = limitrdnum;
-                               kwq->kw_pre_intrretbits = updatebits;
-                               kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
+                       nfailed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders,
+                                       updatebits, &woken);
+                       if (nfailed != 0) {
+                               _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_READ, nfailed,
+                                               limitrdnum, updatebits);
                        }
                        
                        error = 0;
                        
                        }
                        
                        error = 0;
                        
-                       if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
+                       if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) && 
+                                       ((updatebits & PTH_RWL_WBIT) == 0)) {
                                panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
                                panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
+                       }
                }
                        break;
                        
                }
                        break;
                        
@@ -2151,7 +2225,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                        
                        if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
                                block = 0;
                        
                        if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
                                block = 0;
-                               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
+                               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
                                        updatebits |= PTH_RWL_WBIT;
                                }
                                th = preth;
                                        updatebits |= PTH_RWL_WBIT;
                                }
                                th = preth;
@@ -2161,23 +2235,23 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                        } else {
                                /* we are not granting writelock to the preposting thread */
                                /* if there are writers present or the preposting write thread then W bit is to be set */
                        } else {
                                /* we are not granting writelock to the preposting thread */
                                /* if there are writers present or the preposting write thread then W bit is to be set */
-                               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 ||
+                               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count > 1 ||
                                    (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
                                        updatebits |= PTH_RWL_WBIT;
                                }
                                /* setup next in the queue */
                                    (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
                                        updatebits |= PTH_RWL_WBIT;
                                }
                                /* setup next in the queue */
-                               kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits);
+                               kret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, NULL, updatebits);
                                if (kret == KERN_NOT_WAITING) {
                                if (kret == KERN_NOT_WAITING) {
-                                       kwq->kw_pre_intrcount = 1;      /* actually a count */
-                                       kwq->kw_pre_intrseq = low_writer;
-                                       kwq->kw_pre_intrretbits = updatebits;
-                                       kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
+                                       _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
+                                                       low_writer, updatebits);
                                }
                                error = 0;
                        }
                        kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
                                }
                                error = 0;
                        }
                        kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
-                       if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
+                       if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != 
+                                       (PTH_RWL_KBIT | PTH_RWL_EBIT)) {
                                panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
                                panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
+                       }
                }
                        break;
                        
                }
                        break;
                        
@@ -2204,7 +2278,8 @@ ksyn_queue_init(ksyn_queue_t kq)
 }
 
 int
 }
 
 int
-ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit)
+ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe,
+               uint32_t mgen, int fit)
 {
        ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
        uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
 {
        ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
        uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
@@ -2229,11 +2304,13 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint
                        kq->ksynq_lastnum = lockseq;
                }
        } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
                        kq->ksynq_lastnum = lockseq;
                }
        } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
-               /* During prepost when a thread is getting cancelled, we could have two with same seq */
+               /* During prepost when a thread is getting cancelled, we could have
+                * two with same seq */
                res = EBUSY;
                if (kwe->kwe_state == KWE_THREAD_PREPOST) {
                        ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
                res = EBUSY;
                if (kwe->kwe_state == KWE_THREAD_PREPOST) {
                        ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
-                       if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
+                       if (tmp != NULL && tmp->kwe_uth != NULL &&
+                                       pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
                                TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
                                res = 0;
                        }
                                TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
                                res = 0;
                        }
@@ -2267,7 +2344,8 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint
 }
 
 void
 }
 
 void
-ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
+ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+               ksyn_waitq_element_t kwe)
 {
        if (kq->ksynq_count == 0) {
                panic("removing item from empty queue");
 {
        if (kq->ksynq_count == 0) {
                panic("removing item from empty queue");
@@ -2308,7 +2386,8 @@ ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_elemen
 }
 
 ksyn_waitq_element_t
 }
 
 ksyn_waitq_element_t
-ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
+ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+               uint32_t seq)
 {
        ksyn_waitq_element_t kwe;
        
 {
        ksyn_waitq_element_t kwe;
        
@@ -2334,7 +2413,8 @@ ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
                        result = kwe;
                        
                        // KWE_THREAD_INWAIT must be strictly equal
                        result = kwe;
                        
                        // KWE_THREAD_INWAIT must be strictly equal
-                       if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
+                       if (kwe->kwe_state == KWE_THREAD_INWAIT &&
+                                       (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
                                result = NULL;
                        }
                        break;
                                result = NULL;
                        }
                        break;
@@ -2345,7 +2425,8 @@ ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
 
 /* look for a thread at lockseq, a */
 ksyn_waitq_element_t
 
 /* look for a thread at lockseq, a */
 ksyn_waitq_element_t
-ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
+ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+               uint32_t uptoseq, uint32_t signalseq)
 {
        ksyn_waitq_element_t result = NULL;
        ksyn_waitq_element_t q_kwe, r_kwe;
 {
        ksyn_waitq_element_t result = NULL;
        ksyn_waitq_element_t q_kwe, r_kwe;
@@ -2358,7 +2439,8 @@ ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint3
                                return result;
                        }
                }
                                return result;
                        }
                }
-               if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
+               if (q_kwe->kwe_state == KWE_THREAD_PREPOST |
+                               q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
                        /* match any prepost at our same uptoseq or any broadcast above */
                        if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
                                continue;
                        /* match any prepost at our same uptoseq or any broadcast above */
                        if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
                                continue;
@@ -2399,6 +2481,10 @@ ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
        ksyn_waitq_element_t kwe;
        uint32_t tseq = upto & PTHRW_COUNT_MASK;
        ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
        ksyn_waitq_element_t kwe;
        uint32_t tseq = upto & PTHRW_COUNT_MASK;
        ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
+       uint32_t freed = 0, signaled = 0;
+
+       PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_START, kwq->kw_addr,
+                       kqi, upto, all);
        
        while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
                if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
        
        while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
                if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
@@ -2411,17 +2497,28 @@ ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
                         * return them as spurious wait so the cvar state gets
                         * reset correctly.
                         */
                         * return them as spurious wait so the cvar state gets
                         * reset correctly.
                         */
+
+                       PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
+                                       kwq->kw_inqueue, 1);
                        
                        /* skip canceled ones */
                        /* wake the rest */
                        /* set M bit to indicate to waking CV to retun Inc val */
                        
                        /* skip canceled ones */
                        /* wake the rest */
                        /* set M bit to indicate to waking CV to retun Inc val */
-                       (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
+                       (void)ksyn_signal(kwq, kqi, kwe,
+                                       PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
+                       signaled++;
                } else {
                } else {
+                       PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
+                                       kwq->kw_inqueue, 2);
                        ksyn_queue_remove_item(kwq, kq, kwe);
                        ksyn_queue_remove_item(kwq, kq, kwe);
-                       pthread_kern->zfree(kwe_zone, kwe);
+                       zfree(kwe_zone, kwe);
                        kwq->kw_fakecount--;
                        kwq->kw_fakecount--;
+                       freed++;
                }
        }
                }
        }
+
+       PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_END, kwq->kw_addr, freed,
+                       signaled, kwq->kw_inqueue);
 }
 
 /*************************************************************************/
 }
 
 /*************************************************************************/
@@ -2483,7 +2580,8 @@ find_nexthighseq(ksyn_wait_queue_t kwq)
 }
 
 int
 }
 
 int
-find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
+find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters,
+               uint32_t *countp)
 {
        int i;
        uint32_t count = 0;
 {
        int i;
        uint32_t count = 0;
@@ -2540,10 +2638,13 @@ ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
 {
        ksyn_waitq_element_t kwe, newkwe;
        uint32_t updatebits = 0;
 {
        ksyn_waitq_element_t kwe, newkwe;
        uint32_t updatebits = 0;
-       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
        
        struct ksyn_queue kfreeq;
        ksyn_queue_init(&kfreeq);
        
        struct ksyn_queue kfreeq;
        ksyn_queue_init(&kfreeq);
+
+       PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_START, ckwq->kw_addr, upto,
+                       ckwq->kw_inqueue, 0);
        
 retry:
        TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
        
 retry:
        TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
@@ -2555,11 +2656,14 @@ retry:
                if (kwe->kwe_state == KWE_THREAD_INWAIT) {
                        // Wake only non-canceled threads waiting on this CV.
                        if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
                if (kwe->kwe_state == KWE_THREAD_INWAIT) {
                        // Wake only non-canceled threads waiting on this CV.
                        if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
-                               (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
+                               PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, 0, 1);
+                               (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
                                updatebits += PTHRW_INC;
                        }
                } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
                           kwe->kwe_state == KWE_THREAD_PREPOST) {
                                updatebits += PTHRW_INC;
                        }
                } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
                           kwe->kwe_state == KWE_THREAD_PREPOST) {
+                       PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe,
+                                       kwe->kwe_state, 2);
                        ksyn_queue_remove_item(ckwq, kq, kwe);
                        TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
                        ckwq->kw_fakecount--;
                        ksyn_queue_remove_item(ckwq, kq, kwe);
                        TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
                        ckwq->kw_fakecount--;
@@ -2571,27 +2675,34 @@ retry:
        /* Need to enter a broadcast in the queue (if not already at L == S) */
        
        if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
        /* Need to enter a broadcast in the queue (if not already at L == S) */
        
        if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
+               PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, ckwq->kw_lword,
+                               ckwq->kw_sword, 3);
+
                newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
                if (newkwe == NULL) {
                        ksyn_wqunlock(ckwq);
                newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
                if (newkwe == NULL) {
                        ksyn_wqunlock(ckwq);
-                       newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
+                       newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
                        TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
                        ksyn_wqlock(ckwq);
                        goto retry;
                } else {
                        TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
                        ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
                        TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
                        ksyn_wqlock(ckwq);
                        goto retry;
                } else {
                        TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
                        ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
+                       PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, newkwe, 0, 4);
                }
        }
        
        // free up any remaining things stumbled across above
        while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
                TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
                }
        }
        
        // free up any remaining things stumbled across above
        while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
                TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
-               pthread_kern->zfree(kwe_zone, kwe);
+               zfree(kwe_zone, kwe);
        }
        }
+
+       PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_END, ckwq->kw_addr,
+                       updatebits, 0, 0);
        
        if (updatep != NULL) {
        
        if (updatep != NULL) {
-               *updatep = updatebits;
+               *updatep |= updatebits;
        }
 }
 
        }
 }
 
@@ -2601,7 +2712,7 @@ ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
        if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
                if (ckwq->kw_inqueue != 0) {
                        /* FREE THE QUEUE */
        if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
                if (ckwq->kw_inqueue != 0) {
                        /* FREE THE QUEUE */
-                       ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0);
+                       ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 0);
 #if __TESTPANICS__
                        if (ckwq->kw_inqueue != 0)
                                panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
 #if __TESTPANICS__
                        if (ckwq->kw_inqueue != 0)
                                panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
@@ -2619,8 +2730,10 @@ ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
 void
 psynch_zoneinit(void)
 {
 void
 psynch_zoneinit(void)
 {
-       kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
-       kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
+       kwq_zone = zinit(sizeof(struct ksyn_wait_queue),
+                       8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
+       kwe_zone = zinit(sizeof(struct ksyn_waitq_element),
+                       8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
 }
 
 void *
 }
 
 void *
@@ -2641,13 +2754,14 @@ _pthread_get_thread_kwq(thread_t thread)
  * to pthread sync objects.
  */
 void
  * to pthread sync objects.
  */
 void
-_pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo * waitinfo)
+_pthread_find_owner(thread_t thread,
+               struct stackshot_thread_waitinfo * waitinfo)
 {
        ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
        switch (waitinfo->wait_type) {
                case kThreadWaitPThreadMutex:
                        assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
 {
        ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
        switch (waitinfo->wait_type) {
                case kThreadWaitPThreadMutex:
                        assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
-                       waitinfo->owner   = kwq->kw_owner;
+                       waitinfo->owner   = thread_tid(kwq->kw_owner);
                        waitinfo->context = kwq->kw_addr;
                        break;
                /* Owner of rwlock not stored in kernel space due to races. Punt
                        waitinfo->context = kwq->kw_addr;
                        break;
                /* Owner of rwlock not stored in kernel space due to races. Punt
index e65e7b94d132895e92daa8636cb904fba760f6b9..2e59edc5a818acab9444f362aeee3449f35018a1 100644 (file)
@@ -39,7 +39,9 @@
 // pthread tracing subclasses
 # define _TRACE_SUB_DEFAULT 0
 # define _TRACE_SUB_WORKQUEUE 1
 // pthread tracing subclasses
 # define _TRACE_SUB_DEFAULT 0
 # define _TRACE_SUB_WORKQUEUE 1
-# define _TRACE_SUB_MUTEX 2
+// WQ_TRACE_REQUESTS_SUBCLASS is 2, in xnu
+# define _TRACE_SUB_MUTEX 3
+# define _TRACE_SUB_CONDVAR 4
 
 #ifndef _PTHREAD_BUILDING_CODES_
 
 
 #ifndef _PTHREAD_BUILDING_CODES_
 
@@ -62,14 +64,14 @@ VM_UNSLIDE(void* ptr)
     return (void*)unslid_ptr;
 }
 
     return (void*)unslid_ptr;
 }
 
-# define PTHREAD_TRACE(x,a,b,c,d,e) \
-       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, a, b, c, d, e); } }
+# define PTHREAD_TRACE(x,a,b,c,d) \
+       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, a, b, c, d, 0); } }
 
 
-# define PTHREAD_TRACE_WQ(x,a,b,c,d,e) \
-       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, VM_UNSLIDE(a), b, c, d, e); } }
+# define PTHREAD_TRACE_WQ(x,a,b,c,d) \
+       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), b, c, d, 0); } }
 
 # define PTHREAD_TRACE_WQ_REQ(x,a,b,c,d,e) \
 
 # define PTHREAD_TRACE_WQ_REQ(x,a,b,c,d,e) \
-       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } }
+       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } }
 
 #else // KERNEL
 
 
 #else // KERNEL
 
@@ -138,5 +140,25 @@ TRACE_CODE(psynch_mutex_uunlock, _TRACE_SUB_MUTEX, 0x2);
 TRACE_CODE(psynch_ksyn_incorrect_owner, _TRACE_SUB_MUTEX, 0x3);
 TRACE_CODE(psynch_mutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x4);
 TRACE_CODE(psynch_mutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x5);
 TRACE_CODE(psynch_ksyn_incorrect_owner, _TRACE_SUB_MUTEX, 0x3);
 TRACE_CODE(psynch_mutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x4);
 TRACE_CODE(psynch_mutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x5);
+TRACE_CODE(psynch_mutex_clearprepost, _TRACE_SUB_MUTEX, 0x6);
+TRACE_CODE(psynch_mutex_kwqallocate, _TRACE_SUB_MUTEX, 0x7);
+TRACE_CODE(psynch_mutex_kwqdeallocate, _TRACE_SUB_MUTEX, 0x8);
+TRACE_CODE(psynch_mutex_kwqprepost, _TRACE_SUB_MUTEX, 0x9);
+TRACE_CODE(psynch_mutex_markprepost, _TRACE_SUB_MUTEX, 0x10);
+TRACE_CODE(psynch_mutex_kwqcollision, _TRACE_SUB_MUTEX, 0x11);
+TRACE_CODE(psynch_ffmutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x12);
+TRACE_CODE(psynch_ffmutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x13);
+TRACE_CODE(psynch_ffmutex_wake, _TRACE_SUB_MUTEX, 0x14);
+TRACE_CODE(psynch_mutex_kwqsignal, _TRACE_SUB_MUTEX, 0x15);
+TRACE_CODE(psynch_ffmutex_wait, _TRACE_SUB_MUTEX, 0x16);
+TRACE_CODE(psynch_mutex_kwqwait, _TRACE_SUB_MUTEX, 0x17);
+
+TRACE_CODE(psynch_cvar_kwait, _TRACE_SUB_CONDVAR, 0x0);
+TRACE_CODE(psynch_cvar_clrprepost, _TRACE_SUB_CONDVAR, 0x1);
+TRACE_CODE(psynch_cvar_freeitems, _TRACE_SUB_CONDVAR, 0x2);
+TRACE_CODE(psynch_cvar_signal, _TRACE_SUB_CONDVAR, 0x3);
+TRACE_CODE(psynch_cvar_broadcast, _TRACE_SUB_CONDVAR, 0x5);
+TRACE_CODE(psynch_cvar_zeroed, _TRACE_SUB_CONDVAR, 0x6);
+TRACE_CODE(psynch_cvar_updateval, _TRACE_SUB_CONDVAR, 0x7);
 
 #endif // _KERN_TRACE_H_
 
 #endif // _KERN_TRACE_H_
index 6b22c412b7396dd035b46cfbd8997ff499710103..1b9d6c2359e00b2d49c4ed7eda170403b50a93a3 100644 (file)
 #ifndef __SYNCH_INTERNAL_H__
 #define __SYNCH_INTERNAL_H__
 
 #ifndef __SYNCH_INTERNAL_H__
 #define __SYNCH_INTERNAL_H__
 
+// kwe_state
+enum {
+       KWE_THREAD_INWAIT = 1,
+       KWE_THREAD_PREPOST,
+       KWE_THREAD_BROADCAST,
+};
 
 #define _PTHREAD_MTX_OPT_PSHARED 0x010
 #define _PTHREAD_MTX_OPT_NOTIFY 0x1000 /* notify to drop mutex handling in cvwait */
 
 #define _PTHREAD_MTX_OPT_PSHARED 0x010
 #define _PTHREAD_MTX_OPT_NOTIFY 0x1000 /* notify to drop mutex handling in cvwait */
 #define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0)
 
 // S word tests
 #define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0)
 
 // S word tests
-#define is_rws_setseq(x) (((x) & PTH_RWS_SBIT))
-#define is_rws_setunlockinit(x) (((x) & PTH_RWS_IBIT))
+#define is_rws_sbit_set(x) (((x) & PTH_RWS_SBIT) != 0)
+#define is_rws_unlockinit_set(x) (((x) & PTH_RWS_IBIT) != 0)
+#define is_rws_savemask_set(x) (((x) & PTHRW_RWS_SAVEMASK) != 0)
+#define is_rws_pbit_set(x) (((x) & PTH_RWS_CV_PBIT) != 0)
+
+// kwe_flags
+#define KWE_FLAG_LOCKPREPOST   0x1 // cvwait caused a lock prepost
 
 static inline int
 is_seqlower(uint32_t x, uint32_t y)
 
 static inline int
 is_seqlower(uint32_t x, uint32_t y)
index 28d870e529aba81f885c27274791e25de43fa346..c044fe763754faaef4c45d74e8a7d01423649b05 100644 (file)
  * duplicate definitions that used to exist in both projects, when separate.
  */
 
  * duplicate definitions that used to exist in both projects, when separate.
  */
 
-/* workq_kernreturn commands */
-#define WQOPS_THREAD_RETURN        0x04        /* parks the thread back into the kernel */
-#define WQOPS_QUEUE_NEWSPISUPP     0x10        /* this is to check for newer SPI support */
-#define WQOPS_QUEUE_REQTHREADS     0x20        /* request number of threads of a prio */
-#define WQOPS_QUEUE_REQTHREADS2    0x30        /* request a number of threads in a given priority bucket */
-#define WQOPS_THREAD_KEVENT_RETURN 0x40        /* parks the thread after delivering the passed kevent array */
-#define WQOPS_SET_EVENT_MANAGER_PRIORITY 0x80  /* max() in the provided priority in the the priority of the event manager */
-#define WQOPS_THREAD_WORKLOOP_RETURN 0x100     /* parks the thread after delivering the passed kevent array */
-#define WQOPS_SHOULD_NARROW 0x200      /* checks whether we should narrow our concurrency */
-
-/* flag values for upcall flags field, only 8 bits per struct threadlist */
-#define        WQ_FLAG_THREAD_PRIOMASK                 0x0000ffff
-#define WQ_FLAG_THREAD_PRIOSHIFT               16
-#define        WQ_FLAG_THREAD_OVERCOMMIT               0x00010000      /* thread is with overcommit prio */
-#define        WQ_FLAG_THREAD_REUSE                    0x00020000      /* thread is being reused */
-#define        WQ_FLAG_THREAD_NEWSPI                   0x00040000      /* the call is with new SPIs */
-#define WQ_FLAG_THREAD_KEVENT                  0x00080000  /* thread is response to kevent req */
-#define WQ_FLAG_THREAD_EVENT_MANAGER   0x00100000  /* event manager thread */
-#define WQ_FLAG_THREAD_TSD_BASE_SET            0x00200000  /* tsd base has already been set */
-#define WQ_FLAG_THREAD_WORKLOOP                        0x00400000  /* workloop thread */
-
-#define WQ_THREAD_CLEANUP_QOS QOS_CLASS_DEFAULT
-
-#define WQ_KEVENT_LIST_LEN  16 // WORKQ_KEVENT_EVENT_BUFFER_LEN
-#define WQ_KEVENT_DATA_SIZE (32 * 1024)
-
-/* These definitions are only available to the kext, to avoid bleeding constants and types across the boundary to
- * the userspace library.
- */
-#ifdef KERNEL
-
-/* These defines come from kern/thread.h but are XNU_KERNEL_PRIVATE so do not get
- * exported to kernel extensions.
- */
-#define SCHED_CALL_BLOCK 0x1
-#define SCHED_CALL_UNBLOCK 0x2
-
-// kwe_state
-enum {
-       KWE_THREAD_INWAIT = 1,
-       KWE_THREAD_PREPOST,
-       KWE_THREAD_BROADCAST,
-};
-
-/* old workq priority scheme */
-
-#define WORKQUEUE_HIGH_PRIOQUEUE    0       /* high priority queue */
-#define WORKQUEUE_DEFAULT_PRIOQUEUE 1       /* default priority queue */
-#define WORKQUEUE_LOW_PRIOQUEUE     2       /* low priority queue */
-#define WORKQUEUE_BG_PRIOQUEUE      3       /* background priority queue */
-
-#define WORKQUEUE_NUM_BUCKETS 7
-
 // Sometimes something gets passed a bucket number and we need a way to express
 // Sometimes something gets passed a bucket number and we need a way to express
-// that it's actually the event manager.  Use the (n+1)th bucket for that.
-#define WORKQUEUE_EVENT_MANAGER_BUCKET (WORKQUEUE_NUM_BUCKETS-1)
-
-/* wq_max_constrained_threads = max(64, N_CPU * WORKQUEUE_CONSTRAINED_FACTOR)
- * This used to be WORKQUEUE_NUM_BUCKETS + 1 when NUM_BUCKETS was 4, yielding
- * N_CPU * 5. When NUM_BUCKETS changed, we decided that the limit should
- * not change. So the factor is now always 5.
- */
-#define WORKQUEUE_CONSTRAINED_FACTOR 5
-
-#define WORKQUEUE_OVERCOMMIT   0x10000
-
-/*
- * A thread which is scheduled may read its own th_priority field without
- * taking the workqueue lock.  Other fields should be assumed to require the
- * lock.
- */
-struct threadlist {
-       TAILQ_ENTRY(threadlist) th_entry;
-       thread_t th_thread;
-       struct workqueue *th_workq;
-       mach_vm_offset_t th_stackaddr;
-       mach_port_name_t th_thport;
-       uint16_t th_flags;
-       uint8_t th_upcall_flags;
-       uint8_t th_priority;
-};
-
-#define TH_LIST_INITED         0x0001 /* Set at thread creation. */
-#define TH_LIST_RUNNING                0x0002 /* On thrunlist, not parked. */
-#define TH_LIST_KEVENT         0x0004 /* Thread requested by kevent */
-#define TH_LIST_NEW            0x0008 /* First return to userspace */
-#define TH_LIST_BUSY           0x0010 /* Removed from idle list but not ready yet. */
-#define TH_LIST_KEVENT_BOUND   0x0020 /* Thread bound to kqueues */
-#define TH_LIST_CONSTRAINED    0x0040 /* Non-overcommit thread. */
-#define TH_LIST_EVENT_MGR_SCHED_PRI    0x0080 /* Non-QoS Event Manager */
-#define TH_LIST_UNBINDING      0x0100 /* Thread is unbinding during park */
-#define TH_LIST_REMOVING_VOUCHER       0x0200 /* Thread is removing its voucher */
-#define TH_LIST_PACING         0x0400 /* Thread is participating in pacing */
-
-struct threadreq {
-       TAILQ_ENTRY(threadreq) tr_entry;
-       uint16_t tr_flags;
-       uint8_t tr_state;
-       uint8_t tr_priority;
-};
-TAILQ_HEAD(threadreq_head, threadreq);
-
-#define TR_STATE_NEW           0 /* Not yet enqueued */
-#define TR_STATE_WAITING       1 /* Waiting to be serviced - on reqlist */
-#define TR_STATE_COMPLETE      2 /* Request handled - for caller to free */
-#define TR_STATE_DEAD          3
-
-#define TR_FLAG_KEVENT         0x01
-#define TR_FLAG_OVERCOMMIT     0x02
-#define TR_FLAG_ONSTACK                0x04
-#define TR_FLAG_WORKLOOP       0x08
-#define TR_FLAG_NO_PACING      0x10
-
-#if defined(__LP64__)
-typedef unsigned __int128 wq_thactive_t;
-#else
-typedef uint64_t wq_thactive_t;
-#endif
-
-struct workqueue {
-       proc_t          wq_proc;
-       vm_map_t        wq_map;
-       task_t          wq_task;
-
-       lck_spin_t      wq_lock;
-
-       thread_call_t   wq_atimer_delayed_call;
-       thread_call_t   wq_atimer_immediate_call;
-
-       uint32_t _Atomic wq_flags;
-       uint32_t        wq_timer_interval;
-       uint32_t        wq_threads_scheduled;
-       uint32_t        wq_constrained_threads_scheduled;
-       uint32_t        wq_nthreads;
-       uint32_t        wq_thidlecount;
-       uint32_t        wq_event_manager_priority;
-       uint8_t         wq_lflags; // protected by wqueue lock
-       uint8_t         wq_paced; // protected by wqueue lock
-       uint16_t    __wq_unused;
-
-       TAILQ_HEAD(, threadlist) wq_thrunlist;
-       TAILQ_HEAD(, threadlist) wq_thidlelist;
-       TAILQ_HEAD(, threadlist) wq_thidlemgrlist;
-
-       uint32_t        wq_reqcount;    /* number of elements on the following lists */
-       struct threadreq_head wq_overcommit_reqlist[WORKQUEUE_EVENT_MANAGER_BUCKET];
-       struct threadreq_head wq_reqlist[WORKQUEUE_EVENT_MANAGER_BUCKET];
-       struct threadreq wq_event_manager_threadreq;
-
-       struct threadreq *wq_cached_threadreq;
-
-       uint16_t        wq_thscheduled_count[WORKQUEUE_NUM_BUCKETS];
-       _Atomic wq_thactive_t wq_thactive;
-       _Atomic uint64_t wq_lastblocked_ts[WORKQUEUE_NUM_BUCKETS];
-};
-#define WQ_EXITING             0x01
-#define WQ_ATIMER_DELAYED_RUNNING      0x02
-#define WQ_ATIMER_IMMEDIATE_RUNNING    0x04
-
-#define WQL_ATIMER_BUSY                0x01
-#define WQL_ATIMER_WAITING     0x02
-
-#define WORKQUEUE_MAXTHREADS           512
-#define WQ_STALLED_WINDOW_USECS                200
-#define WQ_REDUCE_POOL_WINDOW_USECS    5000000
-#define        WQ_MAX_TIMER_INTERVAL_USECS     50000
-
-#define WQ_THREADLIST_EXITING_POISON (void *)~0ul
-
-#endif // KERNEL
+// that it's actually the event manager.  Use the (0)th bucket for that.
+#define WORKQ_THREAD_QOS_MIN        (THREAD_QOS_MAINTENANCE)
+#define WORKQ_THREAD_QOS_MAX        (THREAD_QOS_LAST - 1)
+#define WORKQ_THREAD_QOS_CLEANUP    (THREAD_QOS_LEGACY)
+#define WORKQ_THREAD_QOS_MANAGER    (THREAD_QOS_LAST) // outside of MIN/MAX
+
+#define WORKQ_NUM_QOS_BUCKETS       (WORKQ_THREAD_QOS_MAX)
+#define WORKQ_NUM_BUCKETS           (WORKQ_THREAD_QOS_MAX + 1)
+#define WORKQ_IDX(qos)              ((qos) - 1) // 0 based index
+
+// magical `nkevents` values for _pthread_wqthread
+#define WORKQ_EXIT_THREAD_NKEVENT   (-1)
 
 #endif // _WORKQUEUE_INTERNAL_H_
 
 #endif // _WORKQUEUE_INTERNAL_H_
index 33df53770ed9fd472894ec34b84a3cd3e4aaaba0..1c4fd1a07292ce8965a0a5ba9daf68fccd0f5044 100644 (file)
 /* End PBXAggregateTarget section */
 
 /* Begin PBXBuildFile section */
 /* End PBXAggregateTarget section */
 
 /* Begin PBXBuildFile section */
+               6E2A3BBE2101222F0003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; settings = {ATTRIBUTES = (Public, ); }; };
+               6E2A3BBF210122300003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; settings = {ATTRIBUTES = (Public, ); }; };
+               6E2A3BC0210122340003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; };
+               6E5869C720C9040A00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
+               6E5869C820C9040B00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
+               6E5869C920C9040C00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; };
+               6E5869CB20C9043200F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869CC20C9043B00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869CD20C9043B00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869CE20C9043C00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869CF20C9043C00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869D020C9043D00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869D120C9043D00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869D220C9043E00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
                6E8C16541B14F08A00C8987C /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; };
                6E8C16551B14F08A00C8987C /* pthread.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325FA15B7513200270056 /* pthread.c */; };
                6E8C16561B14F08A00C8987C /* pthread_cancelable.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F115B7513200270056 /* pthread_cancelable.c */; };
                6E8C16541B14F08A00C8987C /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; };
                6E8C16551B14F08A00C8987C /* pthread.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325FA15B7513200270056 /* pthread.c */; };
                6E8C16561B14F08A00C8987C /* pthread_cancelable.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F115B7513200270056 /* pthread_cancelable.c */; };
                C9A1BF5015C9A59B006BB313 /* sched.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A3260115B7513700270056 /* sched.h */; settings = {ATTRIBUTES = (Public, ); }; };
                C9A1BF5315C9A9F5006BB313 /* pthread_cancelable_cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5215C9A9F5006BB313 /* pthread_cancelable_cancel.c */; };
                C9A1BF5515C9CB9D006BB313 /* pthread_cancelable_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5415C9CB9D006BB313 /* pthread_cancelable_legacy.c */; };
                C9A1BF5015C9A59B006BB313 /* sched.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A3260115B7513700270056 /* sched.h */; settings = {ATTRIBUTES = (Public, ); }; };
                C9A1BF5315C9A9F5006BB313 /* pthread_cancelable_cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5215C9A9F5006BB313 /* pthread_cancelable_cancel.c */; };
                C9A1BF5515C9CB9D006BB313 /* pthread_cancelable_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5415C9CB9D006BB313 /* pthread_cancelable_legacy.c */; };
-               C9A960B0183EB42700AE10C8 /* kern_policy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A960AF183EB42700AE10C8 /* kern_policy.c */; };
                C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F915B7513200270056 /* workqueue_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                C9BB478D15E6ADF700F135B7 /* tsd_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F415B7513200270056 /* tsd_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                C9CCFB9D18B6D0910060CAAE /* qos_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C99B17DA189C2E1B00991D38 /* qos_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F915B7513200270056 /* workqueue_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                C9BB478D15E6ADF700F135B7 /* tsd_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F415B7513200270056 /* tsd_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                C9CCFB9D18B6D0910060CAAE /* qos_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C99B17DA189C2E1B00991D38 /* qos_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                        containerPortal = C9A325D915B7347000270056 /* Project object */;
                        proxyType = 1;
                        remoteGlobalIDString = E4F4498C1E82C1F000A7FB9A;
                        containerPortal = C9A325D915B7347000270056 /* Project object */;
                        proxyType = 1;
                        remoteGlobalIDString = E4F4498C1E82C1F000A7FB9A;
-                       remoteInfo = "libpthread alt resolved";
+                       remoteInfo = "libpthread armv81 resolved";
                };
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXFileReference section */
                };
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXFileReference section */
+               6E2A3BBD210122230003B53B /* stack_np.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack_np.h; sourceTree = "<group>"; };
+               6E514A0220B67C0900844EE1 /* offsets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = offsets.h; sourceTree = "<group>"; };
+               6E5869C620C8FE8300F1CB75 /* dependency_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dependency_private.h; sourceTree = "<group>"; };
+               6E5869CA20C9043200F1CB75 /* pthread_dependency.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = pthread_dependency.c; sourceTree = "<group>"; };
                6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
                6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = pthread_introspection.xcconfig; sourceTree = "<group>"; };
                6EB232C91B0EB29D005915CE /* resolver.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = resolver.c; sourceTree = "<group>"; };
                6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
                6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = pthread_introspection.xcconfig; sourceTree = "<group>"; };
                6EB232C91B0EB29D005915CE /* resolver.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = resolver.c; sourceTree = "<group>"; };
                C9A3260015B7513700270056 /* pthread_spis.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pthread_spis.h; sourceTree = "<group>"; };
                C9A3260115B7513700270056 /* sched.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sched.h; sourceTree = "<group>"; };
                C9A3260C15B759B600270056 /* pthread.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = pthread.xcconfig; sourceTree = "<group>"; };
                C9A3260015B7513700270056 /* pthread_spis.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pthread_spis.h; sourceTree = "<group>"; };
                C9A3260115B7513700270056 /* sched.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sched.h; sourceTree = "<group>"; };
                C9A3260C15B759B600270056 /* pthread.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = pthread.xcconfig; sourceTree = "<group>"; };
-               C9A960AF183EB42700AE10C8 /* kern_policy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kern_policy.c; sourceTree = "<group>"; };
                C9A960B318452B2F00AE10C8 /* pthread.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; path = pthread.py; sourceTree = "<group>"; };
                C9A960B618452CDD00AE10C8 /* install-lldbmacros.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-lldbmacros.sh"; sourceTree = "<group>"; };
                C9C2212D15FA978D00447568 /* pthread.aliases */ = {isa = PBXFileReference; lastKnownFileType = text; path = pthread.aliases; sourceTree = "<group>"; };
                C9A960B318452B2F00AE10C8 /* pthread.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; path = pthread.py; sourceTree = "<group>"; };
                C9A960B618452CDD00AE10C8 /* install-lldbmacros.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-lldbmacros.sh"; sourceTree = "<group>"; };
                C9C2212D15FA978D00447568 /* pthread.aliases */ = {isa = PBXFileReference; lastKnownFileType = text; path = pthread.aliases; sourceTree = "<group>"; };
                E4943AA71E80BD8400D2A961 /* resolver_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resolver_internal.h; sourceTree = "<group>"; };
                E4D962F919086AD600E8A9F2 /* qos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qos.h; sourceTree = "<group>"; };
                E4D962FC19086C5700E8A9F2 /* install-sys-headers.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "install-sys-headers.sh"; sourceTree = "<group>"; };
                E4943AA71E80BD8400D2A961 /* resolver_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resolver_internal.h; sourceTree = "<group>"; };
                E4D962F919086AD600E8A9F2 /* qos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qos.h; sourceTree = "<group>"; };
                E4D962FC19086C5700E8A9F2 /* install-sys-headers.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "install-sys-headers.sh"; sourceTree = "<group>"; };
-               E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_alt.a; sourceTree = BUILT_PRODUCTS_DIR; };
+               E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_armv81.a; sourceTree = BUILT_PRODUCTS_DIR; };
                E4F449A31E82CF0100A7FB9A /* resolver.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = resolver.xcconfig; sourceTree = "<group>"; };
                E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
                FC30E28D16A747AD00A25B5F /* synch_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = synch_internal.h; sourceTree = "<group>"; };
                E4F449A31E82CF0100A7FB9A /* resolver.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = resolver.xcconfig; sourceTree = "<group>"; };
                E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
                FC30E28D16A747AD00A25B5F /* synch_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = synch_internal.h; sourceTree = "<group>"; };
                                C9D9E8FE1626248800448CED /* pthread-Info.plist */,
                                C9C533841607C928009988FA /* kern_internal.h */,
                                C9169DDF1603DF9B005A2F8C /* kern_init.c */,
                                C9D9E8FE1626248800448CED /* pthread-Info.plist */,
                                C9C533841607C928009988FA /* kern_internal.h */,
                                C9169DDF1603DF9B005A2F8C /* kern_init.c */,
-                               C9A960AF183EB42700AE10C8 /* kern_policy.c */,
                                C9169DDB1603DE84005A2F8C /* kern_synch.c */,
                                C9169DDC1603DE84005A2F8C /* kern_support.c */,
                                C979E9FB18A1BC2A000951E5 /* kern_trace.h */,
                                C9169DDB1603DE84005A2F8C /* kern_synch.c */,
                                C9169DDC1603DE84005A2F8C /* kern_support.c */,
                                C979E9FB18A1BC2A000951E5 /* kern_trace.h */,
                                6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */,
                                C04545B81C584F4A006A53B3 /* libpthread.a */,
                                E41505E71E818BEB00F243FB /* libpthread_mp.a */,
                                6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */,
                                C04545B81C584F4A006A53B3 /* libpthread.a */,
                                E41505E71E818BEB00F243FB /* libpthread_mp.a */,
-                               E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */,
+                               E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */,
                                E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */,
                        );
                        name = Products;
                                E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */,
                        );
                        name = Products;
                C9A325ED15B74FB600270056 /* src */ = {
                        isa = PBXGroup;
                        children = (
                C9A325ED15B74FB600270056 /* src */ = {
                        isa = PBXGroup;
                        children = (
+                               6E514A0220B67C0900844EE1 /* offsets.h */,
                                C9A325F315B7513200270056 /* internal.h */,
                                C9A325EF15B7513200270056 /* plockstat.d */,
                                C9A325FA15B7513200270056 /* pthread.c */,
                                C9A325F315B7513200270056 /* internal.h */,
                                C9A325EF15B7513200270056 /* plockstat.d */,
                                C9A325FA15B7513200270056 /* pthread.c */,
                                C9A325F215B7513200270056 /* pthread_cond.c */,
                                924D8EDE1C11832A002AC2BC /* pthread_cwd.c */,
                                C9A325F515B7513200270056 /* pthread_mutex.c */,
                                C9A325F215B7513200270056 /* pthread_cond.c */,
                                924D8EDE1C11832A002AC2BC /* pthread_cwd.c */,
                                C9A325F515B7513200270056 /* pthread_mutex.c */,
+                               6E5869CA20C9043200F1CB75 /* pthread_dependency.c */,
                                C9A325F615B7513200270056 /* pthread_rwlock.c */,
                                C975D5DC15C9D16B0098ECD8 /* pthread_support.c */,
                                C9A325F815B7513200270056 /* pthread_tsd.c */,
                                C9A325F615B7513200270056 /* pthread_rwlock.c */,
                                C975D5DC15C9D16B0098ECD8 /* pthread_support.c */,
                                C9A325F815B7513200270056 /* pthread_tsd.c */,
                                C9A3260015B7513700270056 /* pthread_spis.h */,
                                C9A3260115B7513700270056 /* sched.h */,
                                C98C95D818FF1F4E005654FB /* spawn.h */,
                                C9A3260015B7513700270056 /* pthread_spis.h */,
                                C9A3260115B7513700270056 /* sched.h */,
                                C98C95D818FF1F4E005654FB /* spawn.h */,
+                               6E2A3BBD210122230003B53B /* stack_np.h */,
                                C9244C1A185FCFED00075748 /* qos.h */,
                        );
                        path = pthread;
                                C9244C1A185FCFED00075748 /* qos.h */,
                        );
                        path = pthread;
                                E4657D4017284F7B007D1847 /* introspection_private.h */,
                                C99B17DA189C2E1B00991D38 /* qos_private.h */,
                                E4063CF21906B4FB000202F9 /* qos.h */,
                                E4657D4017284F7B007D1847 /* introspection_private.h */,
                                C99B17DA189C2E1B00991D38 /* qos_private.h */,
                                E4063CF21906B4FB000202F9 /* qos.h */,
+                               6E5869C620C8FE8300F1CB75 /* dependency_private.h */,
                        );
                        path = private;
                        sourceTree = "<group>";
                        );
                        path = private;
                        sourceTree = "<group>";
                        isa = PBXHeadersBuildPhase;
                        buildActionMask = 2147483647;
                        files = (
                        isa = PBXHeadersBuildPhase;
                        buildActionMask = 2147483647;
                        files = (
+                               6E2A3BC0210122340003B53B /* stack_np.h in Headers */,
                                6E8C16711B14F08A00C8987C /* posix_sched.h in Headers */,
                                6E8C166F1B14F08A00C8987C /* introspection_private.h in Headers */,
                                E41A64AE1E83C470009479A9 /* introspection.h in Headers */,
                                6E8C16711B14F08A00C8987C /* posix_sched.h in Headers */,
                                6E8C166F1B14F08A00C8987C /* introspection_private.h in Headers */,
                                E41A64AE1E83C470009479A9 /* introspection.h in Headers */,
                                6E8C166B1B14F08A00C8987C /* pthread_impl.h in Headers */,
                                6E8C166D1B14F08A00C8987C /* pthread_spis.h in Headers */,
                                6E8C166E1B14F08A00C8987C /* sched.h in Headers */,
                                6E8C166B1B14F08A00C8987C /* pthread_impl.h in Headers */,
                                6E8C166D1B14F08A00C8987C /* pthread_spis.h in Headers */,
                                6E8C166E1B14F08A00C8987C /* sched.h in Headers */,
+                               6E5869C920C9040C00F1CB75 /* dependency_private.h in Headers */,
                                6E8C16751B14F08A00C8987C /* spawn.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                                6E8C16751B14F08A00C8987C /* spawn.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                        isa = PBXHeadersBuildPhase;
                        buildActionMask = 2147483647;
                        files = (
                        isa = PBXHeadersBuildPhase;
                        buildActionMask = 2147483647;
                        files = (
+                               6E2A3BBE2101222F0003B53B /* stack_np.h in Headers */,
                                C9244C1B185FD33000075748 /* qos.h in Headers */,
                                C9A1BF4D15C9A58E006BB313 /* pthread.h in Headers */,
                                C9A1BF4E15C9A594006BB313 /* pthread_impl.h in Headers */,
                                C9244C1B185FD33000075748 /* qos.h in Headers */,
                                C9A1BF4D15C9A58E006BB313 /* pthread.h in Headers */,
                                C9A1BF4E15C9A594006BB313 /* pthread_impl.h in Headers */,
                                C98C95D918FF1F4E005654FB /* spawn.h in Headers */,
                                C99AD87C15DEC5290009A6F8 /* spinlock_private.h in Headers */,
                                C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */,
                                C98C95D918FF1F4E005654FB /* spawn.h in Headers */,
                                C99AD87C15DEC5290009A6F8 /* spinlock_private.h in Headers */,
                                C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */,
+                               6E5869C720C9040A00F1CB75 /* dependency_private.h in Headers */,
                                C9153096167ACC2B006BB094 /* private.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                                C9153096167ACC2B006BB094 /* private.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                        isa = PBXHeadersBuildPhase;
                        buildActionMask = 2147483647;
                        files = (
                        isa = PBXHeadersBuildPhase;
                        buildActionMask = 2147483647;
                        files = (
+                               6E2A3BBF210122300003B53B /* stack_np.h in Headers */,
                                E4F449BE1E82D03500A7FB9A /* qos.h in Headers */,
                                E4F449BF1E82D03500A7FB9A /* pthread.h in Headers */,
                                E4F449C01E82D03500A7FB9A /* pthread_impl.h in Headers */,
                                E4F449BE1E82D03500A7FB9A /* qos.h in Headers */,
                                E4F449BF1E82D03500A7FB9A /* pthread.h in Headers */,
                                E4F449C01E82D03500A7FB9A /* pthread_impl.h in Headers */,
                                E4F449C91E82D03500A7FB9A /* spawn.h in Headers */,
                                E4F449CA1E82D03500A7FB9A /* spinlock_private.h in Headers */,
                                E4F449CB1E82D03500A7FB9A /* workqueue_private.h in Headers */,
                                E4F449C91E82D03500A7FB9A /* spawn.h in Headers */,
                                E4F449CA1E82D03500A7FB9A /* spinlock_private.h in Headers */,
                                E4F449CB1E82D03500A7FB9A /* workqueue_private.h in Headers */,
+                               6E5869C820C9040B00F1CB75 /* dependency_private.h in Headers */,
                                E4F449CC1E82D03500A7FB9A /* private.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                                E4F449CC1E82D03500A7FB9A /* private.h in Headers */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                        productReference = E41505E71E818BEB00F243FB /* libpthread_mp.a */;
                        productType = "com.apple.product-type.library.static";
                };
                        productReference = E41505E71E818BEB00F243FB /* libpthread_mp.a */;
                        productType = "com.apple.product-type.library.static";
                };
-               E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */ = {
+               E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */ = {
                        isa = PBXNativeTarget;
                        isa = PBXNativeTarget;
-                       buildConfigurationList = E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread alt resolved" */;
+                       buildConfigurationList = E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread armv81 resolved" */;
                        buildPhases = (
                                E4F4498D1E82C1F000A7FB9A /* Sources */,
                                E4F4499C1E82C1F000A7FB9A /* Symlink normal variant */,
                        buildPhases = (
                                E4F4498D1E82C1F000A7FB9A /* Sources */,
                                E4F4499C1E82C1F000A7FB9A /* Symlink normal variant */,
                        );
                        dependencies = (
                        );
                        );
                        dependencies = (
                        );
-                       name = "libpthread alt resolved";
-                       productName = libpthread_alt.a;
-                       productReference = E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */;
+                       name = "libpthread armv81 resolved";
+                       productName = libpthread_armv81.a;
+                       productReference = E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */;
                        productType = "com.apple.product-type.library.static";
                };
                E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */ = {
                        productType = "com.apple.product-type.library.static";
                };
                E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */ = {
                                E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */,
                                6E8C16511B14F08A00C8987C /* libsystem_pthread introspection */,
                                E41505D01E818BEB00F243FB /* libpthread mp resolved */,
                                E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */,
                                6E8C16511B14F08A00C8987C /* libsystem_pthread introspection */,
                                E41505D01E818BEB00F243FB /* libpthread mp resolved */,
-                               E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */,
+                               E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */,
                                C04545A21C584F4A006A53B3 /* libpthread generic */,
                                C90E7A9E15DC3C3800A06D48 /* libpthread dyld */,
                                74E594911613AAF4006C417B /* libpthread eOS */,
                                C04545A21C584F4A006A53B3 /* libpthread generic */,
                                C90E7A9E15DC3C3800A06D48 /* libpthread dyld */,
                                74E594911613AAF4006C417B /* libpthread eOS */,
                                6E8C16631B14F08A00C8987C /* pthread_support.c in Sources */,
                                6E8C16641B14F08A00C8987C /* thread_setup.c in Sources */,
                                6E8C16651B14F08A00C8987C /* pthread_atfork.c in Sources */,
                                6E8C16631B14F08A00C8987C /* pthread_support.c in Sources */,
                                6E8C16641B14F08A00C8987C /* thread_setup.c in Sources */,
                                6E8C16651B14F08A00C8987C /* pthread_atfork.c in Sources */,
+                               6E5869CD20C9043B00F1CB75 /* pthread_dependency.c in Sources */,
                                6E8C16661B14F08A00C8987C /* pthread_asm.s in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                                6E8C16661B14F08A00C8987C /* pthread_asm.s in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                                6EB232D01B0EB325005915CE /* resolver.c in Sources */,
                                74E594931613AAF4006C417B /* pthread.c in Sources */,
                                74E594941613AAF4006C417B /* pthread_cancelable.c in Sources */,
                                6EB232D01B0EB325005915CE /* resolver.c in Sources */,
                                74E594931613AAF4006C417B /* pthread.c in Sources */,
                                74E594941613AAF4006C417B /* pthread_cancelable.c in Sources */,
+                               6E5869D220C9043E00F1CB75 /* pthread_dependency.c in Sources */,
                                74E594A61613AB10006C417B /* pthread_cancelable_cancel.c in Sources */,
                                74E594951613AAF4006C417B /* pthread_cond.c in Sources */,
                                74E594961613AAF4006C417B /* pthread_mutex.c in Sources */,
                                74E594A61613AB10006C417B /* pthread_cancelable_cancel.c in Sources */,
                                74E594951613AAF4006C417B /* pthread_cond.c in Sources */,
                                74E594961613AAF4006C417B /* pthread_mutex.c in Sources */,
                                C04545A41C584F4A006A53B3 /* resolver.c in Sources */,
                                C04545A51C584F4A006A53B3 /* pthread.c in Sources */,
                                C04545A61C584F4A006A53B3 /* pthread_cancelable.c in Sources */,
                                C04545A41C584F4A006A53B3 /* resolver.c in Sources */,
                                C04545A51C584F4A006A53B3 /* pthread.c in Sources */,
                                C04545A61C584F4A006A53B3 /* pthread_cancelable.c in Sources */,
+                               6E5869D020C9043D00F1CB75 /* pthread_dependency.c in Sources */,
                                C04545A71C584F4A006A53B3 /* pthread_cancelable_cancel.c in Sources */,
                                C04545A81C584F4A006A53B3 /* pthread_cond.c in Sources */,
                                C04545A91C584F4A006A53B3 /* pthread_mutex.c in Sources */,
                                C04545A71C584F4A006A53B3 /* pthread_cancelable_cancel.c in Sources */,
                                C04545A81C584F4A006A53B3 /* pthread_cond.c in Sources */,
                                C04545A91C584F4A006A53B3 /* pthread_mutex.c in Sources */,
                                C90E7AA515DC3C9D00A06D48 /* pthread_cancelable.c in Sources */,
                                C90E7AA615DC3C9D00A06D48 /* pthread_cond.c in Sources */,
                                C90E7AA715DC3C9D00A06D48 /* pthread_mutex.c in Sources */,
                                C90E7AA515DC3C9D00A06D48 /* pthread_cancelable.c in Sources */,
                                C90E7AA615DC3C9D00A06D48 /* pthread_cond.c in Sources */,
                                C90E7AA715DC3C9D00A06D48 /* pthread_mutex.c in Sources */,
+                               6E5869D120C9043D00F1CB75 /* pthread_dependency.c in Sources */,
                                C90E7AA815DC3C9D00A06D48 /* pthread_rwlock.c in Sources */,
                                C90E7AA915DC3C9D00A06D48 /* pthread_support.c in Sources */,
                                C90E7AAA15DC3C9D00A06D48 /* pthread_tsd.c in Sources */,
                                C90E7AA815DC3C9D00A06D48 /* pthread_rwlock.c in Sources */,
                                C90E7AA915DC3C9D00A06D48 /* pthread_support.c in Sources */,
                                C90E7AAA15DC3C9D00A06D48 /* pthread_tsd.c in Sources */,
                                C975D5DD15C9D16B0098ECD8 /* pthread_support.c in Sources */,
                                C948FCF715D1D1E100180BF5 /* thread_setup.c in Sources */,
                                C90E7AB815DC40D900A06D48 /* pthread_atfork.c in Sources */,
                                C975D5DD15C9D16B0098ECD8 /* pthread_support.c in Sources */,
                                C948FCF715D1D1E100180BF5 /* thread_setup.c in Sources */,
                                C90E7AB815DC40D900A06D48 /* pthread_atfork.c in Sources */,
+                               6E5869CB20C9043200F1CB75 /* pthread_dependency.c in Sources */,
                                C99AD88015E2D8B50009A6F8 /* pthread_asm.s in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                                C99AD88015E2D8B50009A6F8 /* pthread_asm.s in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                        buildActionMask = 2147483647;
                        files = (
                                C9169DDE1603DE84005A2F8C /* kern_support.c in Sources */,
                        buildActionMask = 2147483647;
                        files = (
                                C9169DDE1603DE84005A2F8C /* kern_support.c in Sources */,
-                               C9A960B0183EB42700AE10C8 /* kern_policy.c in Sources */,
                                C9169DE01603DF9B005A2F8C /* kern_init.c in Sources */,
                                C9D75E4216127B3900C2FB26 /* kern_synch.c in Sources */,
                        );
                                C9169DE01603DF9B005A2F8C /* kern_init.c in Sources */,
                                C9D75E4216127B3900C2FB26 /* kern_synch.c in Sources */,
                        );
                                E41505D21E818BEB00F243FB /* resolver.c in Sources */,
                                E41505D31E818BEB00F243FB /* pthread.c in Sources */,
                                E41505D41E818BEB00F243FB /* pthread_cancelable.c in Sources */,
                                E41505D21E818BEB00F243FB /* resolver.c in Sources */,
                                E41505D31E818BEB00F243FB /* pthread.c in Sources */,
                                E41505D41E818BEB00F243FB /* pthread_cancelable.c in Sources */,
+                               6E5869CE20C9043C00F1CB75 /* pthread_dependency.c in Sources */,
                                E41505D51E818BEB00F243FB /* pthread_cancelable_cancel.c in Sources */,
                                E41505D61E818BEB00F243FB /* pthread_cond.c in Sources */,
                                E41505D71E818BEB00F243FB /* pthread_mutex.c in Sources */,
                                E41505D51E818BEB00F243FB /* pthread_cancelable_cancel.c in Sources */,
                                E41505D61E818BEB00F243FB /* pthread_cond.c in Sources */,
                                E41505D71E818BEB00F243FB /* pthread_mutex.c in Sources */,
                                E4F4498E1E82C1F000A7FB9A /* resolver.c in Sources */,
                                E4F4498F1E82C1F000A7FB9A /* pthread.c in Sources */,
                                E4F449901E82C1F000A7FB9A /* pthread_cancelable.c in Sources */,
                                E4F4498E1E82C1F000A7FB9A /* resolver.c in Sources */,
                                E4F4498F1E82C1F000A7FB9A /* pthread.c in Sources */,
                                E4F449901E82C1F000A7FB9A /* pthread_cancelable.c in Sources */,
+                               6E5869CF20C9043C00F1CB75 /* pthread_dependency.c in Sources */,
                                E4F449911E82C1F000A7FB9A /* pthread_cancelable_cancel.c in Sources */,
                                E4F449921E82C1F000A7FB9A /* pthread_cond.c in Sources */,
                                E4F449931E82C1F000A7FB9A /* pthread_mutex.c in Sources */,
                                E4F449911E82C1F000A7FB9A /* pthread_cancelable_cancel.c in Sources */,
                                E4F449921E82C1F000A7FB9A /* pthread_cond.c in Sources */,
                                E4F449931E82C1F000A7FB9A /* pthread_mutex.c in Sources */,
                                E4F449B81E82D03500A7FB9A /* pthread_support.c in Sources */,
                                E4F449B91E82D03500A7FB9A /* thread_setup.c in Sources */,
                                E4F449BA1E82D03500A7FB9A /* pthread_atfork.c in Sources */,
                                E4F449B81E82D03500A7FB9A /* pthread_support.c in Sources */,
                                E4F449B91E82D03500A7FB9A /* thread_setup.c in Sources */,
                                E4F449BA1E82D03500A7FB9A /* pthread_atfork.c in Sources */,
+                               6E5869CC20C9043B00F1CB75 /* pthread_dependency.c in Sources */,
                                E4F449BB1E82D03500A7FB9A /* pthread_asm.s in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                                E4F449BB1E82D03500A7FB9A /* pthread_asm.s in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                E4F449A21E82C5A400A7FB9A /* PBXTargetDependency */ = {
                        isa = PBXTargetDependency;
                };
                E4F449A21E82C5A400A7FB9A /* PBXTargetDependency */ = {
                        isa = PBXTargetDependency;
-                       target = E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */;
+                       target = E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */;
                        targetProxy = E4F449A11E82C5A400A7FB9A /* PBXContainerItemProxy */;
                };
 /* End PBXTargetDependency section */
                        targetProxy = E4F449A11E82C5A400A7FB9A /* PBXContainerItemProxy */;
                };
 /* End PBXTargetDependency section */
                        isa = XCBuildConfiguration;
                        baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */;
                        buildSettings = {
                        isa = XCBuildConfiguration;
                        baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */;
                        buildSettings = {
-                               RESOLVED_VARIANT = alt;
+                               RESOLVED_VARIANT = armv81;
                        };
                        name = Release;
                };
                        };
                        name = Release;
                };
                        isa = XCBuildConfiguration;
                        baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */;
                        buildSettings = {
                        isa = XCBuildConfiguration;
                        baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */;
                        buildSettings = {
-                               RESOLVED_VARIANT = alt;
+                               RESOLVED_VARIANT = armv81;
                        };
                        name = Debug;
                };
                        };
                        name = Debug;
                };
                        defaultConfigurationIsVisible = 0;
                        defaultConfigurationName = Release;
                };
                        defaultConfigurationIsVisible = 0;
                        defaultConfigurationName = Release;
                };
-               E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread alt resolved" */ = {
+               E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread armv81 resolved" */ = {
                        isa = XCConfigurationList;
                        buildConfigurations = (
                                E4F4499E1E82C1F000A7FB9A /* Release */,
                        isa = XCConfigurationList;
                        buildConfigurations = (
                                E4F4499E1E82C1F000A7FB9A /* Release */,
diff --git a/lldbmacros/init.py b/lldbmacros/init.py
new file mode 100644 (file)
index 0000000..af7fe69
--- /dev/null
@@ -0,0 +1,310 @@
+from xnu import *
+import struct
+
+def GetSeqCount(seq):
+       return (seq >> 8)
+
+def GetLSeqBits(seq):
+       rv = ""
+       if seq & 0x1:
+               rv += "K"
+       if seq & 0x2:
+               rv += "E"
+       if seq & 0x4:
+               rv += "W"
+       if seq & 0x20:
+               rv += "M"
+       if seq & 0x40:
+               rv += "U"
+       if seq & 0x80:
+               rv += "I"
+       return rv
+
+def GetSSeqBits(seq):
+       rv = ""
+       if seq & 0x1:
+               rv += "S"
+       if seq & 0x2:
+               rv += "I"
+       if seq & 0x4:
+               rv += "Ws"
+       return rv
+
+def GetLSeqSummary(seq):
+       return "{:d} {:s}".format(GetSeqCount(seq), GetLSeqBits(seq))
+
+def GetSSeqSummary(seq):
+       return "{:d} {:s}".format(GetSeqCount(seq), GetSSeqBits(seq))
+
+@header("{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}".format('sig', 'tid', 'options', 'lseq', 'useq'))
+def GetUserMutexSummary(task, uaddr):
+       if int(task.t_flags) & 0x1:
+               mtxlayout = "QIIhhIQIII"
+               padoffset = 1
+       else:
+               mtxlayout = "QIIhhQIII"
+               padoffset = 0
+
+       data = GetUserDataAsString(task, unsigned(uaddr), struct.calcsize(mtxlayout))
+       info = struct.unpack(mtxlayout, data)
+
+       format = "{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}"
+       sigstr = str("{0: <#020x}".format(info[0]))
+
+       # the options field dictates whether we were created misaligned
+       if info[2] & 0x800:
+               lseq = info[7+padoffset]
+               useq = info[8+padoffset]
+       else:
+               lseq = info[6+padoffset]
+               useq = info[7+padoffset]
+
+       return format.format(sigstr, hex(info[5+padoffset]), hex(info[2]), hex(lseq), hex(useq))
+
+@lldb_command('showusermutex')
+def PthreadShowUserMutex(cmd_args=None):
+       """
+       display information about a userspace mutex at a given address
+       Syntax: (lldb) showusermutex <task_t> <uaddr>
+       """
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+       task = kern.GetValueFromAddress(cmd_args[0], "task_t")
+       uaddr = kern.GetValueFromAddress(cmd_args[1], "user_addr_t")
+
+       print GetUserMutexSummary.header
+       print GetUserMutexSummary(task, uaddr)
+
+@lldb_type_summary(['ksyn_wait_queue *', 'ksyn_wait_queue_t'])
+@header("{:<20s} {:<20s} {:<10s} {:<6s} {:<6s} {:<8s} {:<8s} {:<8s} {:<8s}".format('kwq', 'uaddr', 'type', 'pflags', 'kflags', 'refs', 'indrop', 'waiters', 'preposts'))
+def GetKwqSummary(kwq):
+       format = "{:<#20x} {:<#20x} {:<10s} {:<6s} {:<6s} {:<8d} {:<8d} {:<8d} {:<8d}\n"
+       kwq = Cast(kwq, "ksyn_wait_queue_t")
+
+       kwqtype = ""
+       if kwq.kw_type & 0xff == 0x01:
+               kwqtype = "mtx"
+       if kwq.kw_type & 0xff == 0x02:
+               kwqtype = "cvar"
+       if kwq.kw_type & 0xff == 0x04:
+               kwqtype = "rwl"
+       if kwq.kw_type & 0xff == 0x05:
+               kwqtype = "sema"
+
+       if kwq.kw_type & 0x1000 == 0x1000:
+               kwqtype += "W" # INWAIT
+       if kwq.kw_type & 0x2000 == 0x2000:
+               kwqtype += "D" # INDROP
+
+       pflags = ""
+       if kwq.kw_pflags & 0x2:
+               pflags += "H" # INHASH
+       if kwq.kw_pflags & 0x4:
+               pflags += "S" # SHARED
+       if kwq.kw_pflags & 0x8:
+               pflags += "W" # WAITING
+       if kwq.kw_pflags & 0x10:
+               pflags += "F" # FREELIST
+
+       kflags = ""
+       if kwq.kw_kflags & 0x1:
+               kflags += "C" # INITCLEARED
+       if kwq.kw_kflags & 0x2:
+               kflags += "Z" # ZEROED
+       if kwq.kw_kflags & 0x4:
+               kflags += "Q" # QOS APPLIED
+       if kwq.kw_kflags & 0x8:
+               kflags += "O" # OVERLAP
+
+       rs = format.format(kwq, kwq.kw_addr, kwqtype, pflags, kflags, kwq.kw_iocount, kwq.kw_dropcount, kwq.kw_inqueue, kwq.kw_fakecount)
+
+       rs += "\t{:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s}\n".format('lowest', 'highest', 'lword', 'uword', 'sword', 'last', 'next')
+       rs += "\t{:<10d} {:<10d} {:<10s} {:<10d} {:<10s} {:<10s} {:<10s}\n".format(
+                       GetSeqCount(kwq.kw_lowseq), GetSeqCount(kwq.kw_highseq),
+                       GetLSeqSummary(kwq.kw_lword), GetSeqCount(kwq.kw_uword),
+                       GetSSeqSummary(kwq.kw_sword), GetSSeqSummary(kwq.kw_lastseqword),
+                       GetSSeqSummary(kwq.kw_nextseqword))
+
+       rs += "\t{:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s}\n".format(
+                       'pposts', 'lseq', 'sseq', 'intr', 'count', 'seq', 'bits')
+
+       intr_type = "NONE"
+       if kwq.kw_intr.type == 0x1:
+               intr_type = "READ"
+       elif kwq.kw_intr.type == 0x2:
+               intr_type = "WRITE"
+
+       rs += "\t{:<10d} {:<10s} {:<10s} {:<10s} {:<10d} {:<10s} {:<10s}\n".format(
+                       kwq.kw_prepost.count,
+                       GetLSeqSummary(kwq.kw_prepost.lseq), GetSSeqSummary(kwq.kw_prepost.sseq),
+                       intr_type, kwq.kw_intr.count,
+                       GetSSeqSummary(kwq.kw_intr.seq), GetSSeqSummary(kwq.kw_intr.returnbits))
+
+       rs += "\twaiting readers:\n"
+       for kwe in IterateTAILQ_HEAD(kwq.kw_ksynqueues[0].ksynq_kwelist, 'kwe_list'):
+               rs += "\t" + GetKweSummary.header + "\n"
+               rs += "\t" + GetKweSummary(kwe) + "\n"
+
+       rs += "\twaiting writers:\n"
+       for kwe in IterateTAILQ_HEAD(kwq.kw_ksynqueues[1].ksynq_kwelist, 'kwe_list'):
+               rs += "\t" + GetKweSummary.header + "\n"
+               rs += "\t" + GetKweSummary(kwe) + "\n"
+
+       if kwq.kw_turnstile:
+               rs += GetTurnstileSummary.header + "\n"
+               rs += GetTurnstileSummary(Cast(kwq.kw_turnstile, "struct turnstile *"))
+
+       return rs
+
+@lldb_type_summary(['ksyn_waitq_element *', 'ksyn_waitq_element_t'])
+@header("{:<20s} {:<20s} {:<10s} {:<10s} {:<20s} {:<20s}".format('kwe', 'kwq', 'lseq', 'state', 'uthread', 'thread'))
+def GetKweSummary(kwe):
+       format = "{:<#20x} {:<#20x} {:<10s} {:<10s} {:<#20x} {:<#20x}"
+       kwe = Cast(kwe, 'struct ksyn_waitq_element *')
+       state = ""
+       if kwe.kwe_state == 1:
+               state = "INWAIT"
+       elif kwe.kwe_state == 2:
+               state = "PPOST"
+       elif kwe.kwe_state == 3:
+               state = "BROAD"
+       else:
+               state = "{:#10x}".format(kwe.kwe_state)
+       return format.format(kwe, kwe.kwe_kwqqueue, GetLSeqSummary(kwe.kwe_lockseq), state, kwe.kwe_uth, kwe.kwe_thread)
+
+@header("{0: <24s} {1: <24s} {2: <24s}".format('thread', 'thread_id', 'uthread'))
+def GetPthreadSummary(thread):
+       format = "{0: <24s} {1: <24s} {2: <24s}"
+
+       threadstr = str("{0: <#020x}".format(thread))
+       if int(thread.static_param):
+               threadstr += "[WQ]"
+
+       uthread = Cast(thread.uthread, "uthread_t")
+       uthreadstr = str("{0: <#020x}".format(uthread))
+
+
+       return format.format(threadstr, hex(thread.thread_id), uthreadstr)
+
+@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags'))
+def GetPthreadWorkqueueSummary(wq):
+       format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}"
+       procstr = str("{0: <#020x}".format(wq.wq_proc))
+       wqstr = str("{0: <#020x}".format(wq))
+       
+       flags = []
+       if wq.wq_flags & 0x1:
+               flags.append("I")
+       if wq.wq_flags & 0x2:
+               flags.append("R")
+       if wq.wq_flags & 0x4:
+               flags.append("E")
+               
+       wqflags = []
+       if wq.wq_lflags & 0x1:
+               wqflags.append("B")
+       if wq.wq_lflags & 0x2:
+               wqflags.append("W")
+       if wq.wq_lflags & 0x4:
+               wqflags.append("C")
+       if wq.wq_lflags & 0x8:
+               wqflags.append("L")
+       
+       return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags))
+
+@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event'))
+def GetPthreadWorkqueueDetail(wq):
+       format = "  {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}"
+       # requests
+       schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6])
+       activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6])
+       return "\n".join([schedstr, activestr])
+
+@lldb_command('showthreadpsynch')
+def PthreadCurrentMutex(cmd_args=None):
+       """
+       display information about a thread's pthread state
+       Syntax: (lldb) showthreadpsync <thread_t>
+       """
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+
+       thread = kern.GetValueFromAddress(cmd_args[0], "thread_t")
+       print GetPthreadSummary.header
+       print GetPthreadSummary(thread)
+
+       uthread = Cast(thread.uthread, "uthread_t")
+       kwe = Cast(addressof(uthread.uu_save.uus_kwe), 'struct ksyn_waitq_element *')
+       if not kwe or not kwe.kwe_kwqqueue:
+               print GetKweSummary.header
+               print GetKweSummary(kwe)
+       else:
+               print GetKwqSummary.header
+               print GetKwqSummary(kwe.kwe_kwqqueue)
+
+@lldb_command('showpthreadkwq')
+def PthreadShowKsynQueue(cmd_args=None):
+       """
+       display information about a pthread ksyn_wait_queue_t
+       Syntax: (lldb) showpthreadkwq <ksyn_wait_queue_t>
+       """
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+
+       kwq = kern.GetValueFromAddress(cmd_args[0], "ksyn_wait_queue_t")
+       print GetKwqSummary.header
+       print GetKwqSummary(kwq)
+
+@lldb_command('showpthreadkwe')
+def PthreadShowKsynElement(cmd_args=None):
+       """
+       display information about a thread's ksyn_waitq_element
+       Syntax: (lldb) showpthreadkwe <ksyn_waitq_element_t>    
+       """
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+
+       kwe = kern.GetValueFromAddress(cmd_args[0], "struct ksyn_waitq_element *")
+       print GetKweSummary.header
+       print GetKweSummary(kwe)
+
+@lldb_command('showpthreadworkqueue')
+def ShowPthreadWorkqueue(cmd_args=None):
+       """
+       display information about a processes' pthread workqueue
+       Syntax: (lldb) showpthreadworkqueue <proc_t>
+       """
+       
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+               
+       proc = kern.GetValueFromAddress(cmd_args[0], "proc_t")
+       wq = Cast(proc.p_wqptr, "struct workqueue *");
+       
+       print GetPthreadWorkqueueSummary.header
+       print GetPthreadWorkqueueSummary(wq)
+       
+       print GetPthreadWorkqueueDetail.header
+       print GetPthreadWorkqueueDetail(wq)
+
+def IterateTAILQ_HEAD(headval, element_name):
+    """ iterate over a TAILQ_HEAD in kernel. refer to bsd/sys/queue.h
+        params:
+            headval     - value : value object representing the head of the list
+            element_name- str          :  string name of the field which holds the list links.
+        returns:
+            A generator does not return. It is used for iterating.
+            value : an object that is of type as headval->tqh_first. Always a pointer object
+        example usage:
+          list_head = kern.GetGlobalVariable('mountlist')
+          for entryobj in IterateTAILQ_HEAD(list_head, 'mnt_list'):
+            print GetEntrySummary(entryobj)
+    """
+    iter_val = headval.tqh_first
+    while unsigned(iter_val) != 0 :
+        yield iter_val
+        iter_val = iter_val.__getattr__(element_name).tqe_next
+    #end of yield loop
+
+def __lldb_init_module(debugger, internal_dict):
+       pass
diff --git a/lldbmacros/pthread.py b/lldbmacros/pthread.py
deleted file mode 100644 (file)
index a24779c..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-from xnu import *
-import struct
-
-@header("{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}".format('sig', 'tid', 'options', 'lseq', 'useq'))
-def GetUserMutexSummary(task, uaddr):
-       if int(task.t_flags) & 0x1:
-               mtxlayout = "QIIhhIQIII"
-               padoffset = 1
-       else:
-               mtxlayout = "QIIhhQIII"
-               padoffset = 0
-
-       data = GetUserDataAsString(task, uaddr, struct.calcsize(mtxlayout))
-       info = struct.unpack(mtxlayout, data)
-
-       format = "{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}"
-       sigstr = str("{0: <#020x}".format(info[0]))
-
-       # the options field dictates whether we were created misaligned
-       if info[2] & 0x800:
-               lseq = info[7+padoffset]
-               useq = info[8+padoffset]
-       else:
-               lseq = info[6+padoffset]
-               useq = info[7+padoffset]
-
-       return format.format(sigstr, hex(info[5+padoffset]), hex(info[2]), hex(lseq), hex(useq))
-
-@lldb_command('showusermutex')
-def PthreadShowUserMutex(cmd_args=None):
-       """
-       display information about a userspace mutex at a given address
-       Syntax: (lldb) showusermutex <task_t> <uaddr>
-       """
-       if not cmd_args:
-               raise ArgumentError("No arguments passed")
-       task = kern.GetValueFromAddress(cmd_args[0], "task_t")
-       uaddr = kern.GetValueFromAddress(cmd_args[1], "user_addr_t")
-
-       print GetUserMutexSummary.header
-       print GetUserMutexSummary(task, uaddr)
-
-@lldb_type_summary(['ksyn_waitq_element *', 'ksyn_waitq_element_t'])
-@header("{0: <24s} {1: <24s} {2: <24s} {3: <10s}".format('kwe', 'kwq', 'uaddr', 'type'))
-def GetKweSummary(kwe):
-       format = "{0: <24s} {1: <24s} {2: <24s} {3: <10s}"
-       kwe = Cast(addressof(kwe), "ksyn_waitq_element_t")
-       kwestr = str("{0: <#020x}".format(kwe))
-
-       kwq = Cast(kwe.kwe_kwqqueue, "ksyn_wait_queue_t")
-       kwqstr = str("{0: <#020x}".format(kwq))
-       uaddrstr = str("{0: <#020x}".format(kwq.kw_addr))
-
-       kwqtype = ""
-       if kwq.kw_type & 0xff == 0x01:
-               kwqtype = "mtx"
-       if kwq.kw_type & 0xff == 0x02:
-               kwqtype = "cvar"
-       if kwq.kw_type & 0xff == 0x04:
-               kwqtype = "rwlock"
-       if kwq.kw_type & 0xff == 0x05:
-               kwqtype = "sema"
-
-       return format.format(kwestr, kwqstr, uaddrstr, kwqtype)
-
-@header("{0: <24s} {1: <24s} {2: <24s}".format('thread', 'thread_id', 'uthread'))
-def GetPthreadSummary(thread):
-       format = "{0: <24s} {1: <24s} {2: <24s}"
-
-       threadstr = str("{0: <#020x}".format(thread))
-       if int(thread.static_param):
-               threadstr += "[WQ]"
-
-       uthread = Cast(thread.uthread, "uthread_t")
-       uthreadstr = str("{0: <#020x}".format(uthread))
-
-
-       return format.format(threadstr, hex(thread.thread_id), uthreadstr)
-
-@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags'))
-def GetPthreadWorkqueueSummary(wq):
-       format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}"
-       procstr = str("{0: <#020x}".format(wq.wq_proc))
-       wqstr = str("{0: <#020x}".format(wq))
-       
-       flags = []
-       if wq.wq_flags & 0x1:
-               flags.append("I")
-       if wq.wq_flags & 0x2:
-               flags.append("R")
-       if wq.wq_flags & 0x4:
-               flags.append("E")
-               
-       wqflags = []
-       if wq.wq_lflags & 0x1:
-               wqflags.append("B")
-       if wq.wq_lflags & 0x2:
-               wqflags.append("W")
-       if wq.wq_lflags & 0x4:
-               wqflags.append("C")
-       if wq.wq_lflags & 0x8:
-               wqflags.append("L")
-       
-       return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags))
-
-@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event'))
-def GetPthreadWorkqueueDetail(wq):
-       format = "  {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}"
-       # requests
-       schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6])
-       activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6])
-       return "\n".join([schedstr, activestr])
-
-@lldb_command('showpthreadstate')
-def PthreadCurrentMutex(cmd_args=None):
-       """
-       display information about a thread's pthread state
-       Syntax: (lldb) showpthreadstate <thread_t>
-       """
-       if not cmd_args:
-               raise ArgumentError("No arguments passed")
-
-       thread = kern.GetValueFromAddress(cmd_args[0], "thread_t")
-       print GetPthreadSummary.header
-       print GetPthreadSummary(thread)
-
-       uthread = Cast(thread.uthread, "uthread_t")
-       kwe = addressof(uthread.uu_kevent.uu_kwe)
-       print GetKweSummary.header
-       print GetKweSummary(kwe)
-
-@lldb_command('showpthreadworkqueue')
-def ShowPthreadWorkqueue(cmd_args=None):
-       """
-       display information about a processes' pthread workqueue
-       Syntax: (lldb) showpthreadworkqueue <proc_t>
-       """
-       
-       if not cmd_args:
-               raise ArgumentError("No arguments passed")
-               
-       proc = kern.GetValueFromAddress(cmd_args[0], "proc_t")
-       wq = Cast(proc.p_wqptr, "struct workqueue *");
-       
-       print GetPthreadWorkqueueSummary.header
-       print GetPthreadWorkqueueSummary(wq)
-       
-       print GetPthreadWorkqueueDetail.header
-       print GetPthreadWorkqueueDetail(wq)
-
-def __lldb_init_module(debugger, internal_dict):
-       pass
index 13e0861e834db636571c007cada52badbc9512dd..756c4079206528fcbcd42e01c17e72f53e3fa6d3 100644 (file)
 .Fn pthread_mutexattr_settype "pthread_mutexattr_t *attr" "int type"
 .Ft int
 .Fn pthread_mutexattr_gettype "pthread_mutexattr_t *attr" "int *type"
 .Fn pthread_mutexattr_settype "pthread_mutexattr_t *attr" "int type"
 .Ft int
 .Fn pthread_mutexattr_gettype "pthread_mutexattr_t *attr" "int *type"
+.Ft int
+.Fn pthread_mutexattr_setpolicy_np "pthread_mutexattr_t *attr" "int policy"
+.Ft int
+.Fn pthread_mutexattr_getpolicy_np "pthread_mutexattr_t *attr" "int *policy"
 .Sh DESCRIPTION
 Mutex attributes are used to specify parameters to
 .Fn pthread_mutex_init .
 .Sh DESCRIPTION
 Mutex attributes are used to specify parameters to
 .Fn pthread_mutex_init .
@@ -164,6 +168,31 @@ This is the default mutex type for
 functions copy the type value of the attribute to the location pointed to by the second parameter.
 .Pp
 The
 functions copy the type value of the attribute to the location pointed to by the second parameter.
 .Pp
 The
+.Fn pthread_mutexattr_setpolicy_np
+function sets the mutex
+.Fa policy
+value of the attribute.
+Valid mutex policies are:
+.Bl -tag -width "XXX" -offset 2n
+.It Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+The first-fit mutex policy allows acquisition of the mutex to occur in any
+order. This policy is similar in operation to os_unfair_lock, new contending
+acquirers may obtain ownership of the mutex ahead of existing waiters.
+.It Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+The fairshare mutex policy guarantees that ownership of a contended mutex will
+be granted to waiters on a strictly ordered first-in, first-out basis. That is,
+a mutex holder that unlocks the mutex and then attempts to relock will wait
+behind existing threads already waiting on the mutex before being granted
+ownership again.
+.El
+.Pp
+The
+.Fn pthread_mutexattr_getpolicy_np
+function copies the mutex
+.Fa policy
+value of the attribute to the location pointed to by the second parameter.
+.Pp
+The
 .Fn pthread_mutexattr_set*
 functions set the attribute that corresponds to each function name.
 .Pp
 .Fn pthread_mutexattr_set*
 functions set the attribute that corresponds to each function name.
 .Pp
@@ -174,6 +203,39 @@ to the location pointed to by the second function parameter.
 .Sh RETURN VALUES
 If successful, these functions return 0.
 Otherwise, an error number is returned to indicate the error.
 .Sh RETURN VALUES
 If successful, these functions return 0.
 Otherwise, an error number is returned to indicate the error.
+.Sh ENVIRONMENT
+The following environment variables change the behavior of the pthread mutex
+implementation.
+.Bl -tag -width "XXX" -offset 2n
+.It Ev PTHREAD_MUTEX_DEFAULT_POLICY
+Controls the process-wide policy used when initializing a pthread_mutex_t that
+has not had a policy set via
+.Fn pthread_mutexattr_setpolicy_np .
+The valid values are mapped as:
+.Pp
+.Bl -tag -width "XXX"
+.It Fa 1
+.Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+.It Fa 3
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+.El
+.El
+.Sh BACKWARDS COMPATIBILITY
+Prior to macOS 10.14 (iOS and tvOS 12.0, watchOS 5.0) the only available
+pthread mutex policy mode was
+.Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP .
+macOS 10.14 (iOS and tvOS 12.0, watchOS 5.0) introduces
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+and also makes this the default mode for mutexes initialized without a policy
+attribute set.
+.Pp
+Attempting to use
+.Fn pthread_mutexattr_setpolicy_np
+to set the policy of a pthread_mutex_t to
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+on earlier releases will fail with
+.Er EINVAL
+and the mutex will continue to operate in fairshare mode.
 .Sh ERRORS
 The
 .Fn pthread_mutexattr_init
 .Sh ERRORS
 The
 .Fn pthread_mutexattr_init
@@ -252,6 +314,27 @@ function will fail if:
 Invalid value for
 .Fa attr .
 .El
 Invalid value for
 .Fa attr .
 .El
+.Pp
+The
+.Fn pthread_mutexattr_setpolicy_np
+function will fail if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Invalid value for
+.Fa attr .
+.El
+.Pp
+The
+.Fn pthread_mutexattr_getpolicy_np
+function will fail if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The value specified either by
+.Fa type
+or
+.Fa attr
+is invalid.
+.El
 .Sh SEE ALSO
 .Xr pthread_mutex_init 3
 .Sh STANDARDS
 .Sh SEE ALSO
 .Xr pthread_mutex_init 3
 .Sh STANDARDS
diff --git a/private/dependency_private.h b/private/dependency_private.h
new file mode 100644 (file)
index 0000000..77d209f
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef __PTHREAD_DEPENDENCY_PRIVATE__
+#define __PTHREAD_DEPENDENCY_PRIVATE__
+
+#include <os/base.h>
+#include <sys/cdefs.h>
+#include <pthread/pthread.h>
+#include <Availability.h>
+
+__BEGIN_DECLS
+
+OS_ASSUME_NONNULL_BEGIN
+
+/*!
+ * @typedef pthread_dependency_t
+ *
+ * @abstract
+ * A pthread dependency is a one-time dependency between a thread producing
+ * a value and a waiter thread, expressed to the system in a way
+ * that priority inversion avoidance can be applied if necessary.
+ *
+ * @discussion
+ * These tokens are one-time use, and meant to be on the stack of the waiter
+ * thread.
+ *
+ * These tokens must be both fulfilled and waited on, exactly one of each.
+ */
+typedef struct pthread_dependency_s {
+       uint32_t __pdep_owner;
+       uint32_t __pdep_opaque1;
+       uint64_t __pdep_opaque2;
+} pthread_dependency_t;
+
+/*!
+ * @typedef pthread_dependency_attr_t
+ *
+ * @abstract
+ * An opaque type to allow for future expansion of the pthread_dependency
+ * interface.
+ */
+typedef struct pthread_dependency_attr_s pthread_dependency_attr_t;
+
+#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE) || defined(__cplusplus)
+/*!
+ * @macro PTHREAD_DEPENDENCY_INITIALIZER_NP
+ *
+ * @abstract
+ * Initialize a one-time dependency token.
+ *
+ * @param __pthread
+ * The thread that will be waited on for this dependency to be fulfilled.
+ * It is expected that this thread will call pthread_dependency_fulfill_np().
+ */
+#define PTHREAD_DEPENDENCY_INITIALIZER_NP(__pthread) \
+               { pthread_mach_thread_np(__pthread), 0, 0 }
+#endif
+
+/*!
+ * @function pthread_dependency_init_np
+ *
+ * @abstract
+ * Initialize a dependency token.
+ *
+ * @param __dependency
+ * A pointer to a dependency token to initialize.
+ *
+ * @param __pthread
+ * The thread that will be waited on for this dependency to be fulfilled.
+ * It is expected that this thread will call pthread_dependency_fulfill_np().
+ *
+ * @param __attrs
+ * This argument is reserved for future expansion purposes, and NULL should be
+ * passed.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NONNULL2 OS_NOTHROW
+void pthread_dependency_init_np(pthread_dependency_t *__dependency,
+               pthread_t __pthread, pthread_dependency_attr_t *_Nullable __attrs);
+
+/*!
+ * @function pthread_dependency_fulfill_np
+ *
+ * @abstract
+ * Fulfill a dependency.
+ *
+ * @discussion
+ * Calling pthread_dependency_fulfill_np() with a token that hasn't been
+ * initialized yet, or calling pthread_dependency_fulfill_np() on the same
+ * dependency token more than once is undefined and will cause the process
+ * to be terminated.
+ *
+ * The thread that calls pthread_dependency_fulfill_np() must be the same
+ * as the pthread_t that was specified when initializing the token. Not doing so
+ * is undefined and will cause the process to be terminated.
+ *
+ * @param __dependency
+ * A pointer to a dependency token that was previously initialized.
+ *
+ * @param __value
+ * An optional value that can be returned through the dependency token
+ * to the waiter.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NOTHROW
+void pthread_dependency_fulfill_np(pthread_dependency_t *__dependency,
+               void * _Nullable __value);
+
+/*!
+ * @function pthread_dependency_wait_np
+ *
+ * @abstract
+ * Wait on a dependency.
+ *
+ * @discussion
+ * Calling pthread_dependency_wait_np() with a token that hasn't been
+ * initialized yet, or calling pthread_dependency_wait_np() on the same
+ * dependency token more than once is undefined and will cause the process
+ * to be terminated.
+ *
+ * If the dependency is not fulfilled yet when this function is called, priority
+ * inversion avoidance will be applied to the thread that was specified when
+ * initializing the token, to ensure that it can call
+ * pthread_dependency_fulfill_np() without causing a priority inversion for the
+ * thread calling pthread_dependency_wait_np().
+ *
+ * @param __dependency
+ * A pointer to a dependency token that was previously initialized with
+ * PTHREAD_DEPENDENCY_INITIALIZER_NP() or pthread_dependency_init_np().
+ *
+ * @returns
+ * The value that was passed to pthread_dependency_fulfill_np() as the `__value`
+ * argument.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NOTHROW
+void *_Nullable pthread_dependency_wait_np(pthread_dependency_t *__dependency);
+
+OS_ASSUME_NONNULL_END
+
+__END_DECLS
+
+#endif //__PTHREAD_DEPENDENCY_PRIVATE__
index b98a350e0f28f0aa3f2894242634c5202cec5c99..b321442be2f0e9fe0f65c0a3d8c12ab6b6e32762 100644 (file)
@@ -93,6 +93,8 @@ int pthread_chdir_np(char *path);
 __API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
 int pthread_fchdir_np(int fd);
 
 __API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
 int pthread_fchdir_np(int fd);
 
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int pthread_attr_setcpupercent_np(pthread_attr_t * __restrict, int, unsigned long);
 
 #ifdef _os_tsd_get_base
 
 
 #ifdef _os_tsd_get_base
 
@@ -107,17 +109,17 @@ __header_always_inline uint64_t
 _pthread_threadid_self_np_direct(void)
 {
 #ifndef __i386__
 _pthread_threadid_self_np_direct(void)
 {
 #ifndef __i386__
-    if (_pthread_has_direct_tsd()) {
+       if (_pthread_has_direct_tsd()) {
 #ifdef OS_GS_RELATIVE
 #ifdef OS_GS_RELATIVE
-        return *(uint64_t OS_GS_RELATIVE *)(_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
+               return *(uint64_t OS_GS_RELATIVE *)(_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
 #else
 #else
-        return *(uint64_t*)((char *)_os_tsd_get_base() + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
+               return *(uint64_t*)((char *)_os_tsd_get_base() + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
 #endif
 #endif
-    }
+       }
 #endif
 #endif
-    uint64_t threadid = 0;
-    pthread_threadid_np(NULL, &threadid);
-    return threadid;
+       uint64_t threadid = 0;
+       pthread_threadid_np(NULL, &threadid);
+       return threadid;
 }
 
 #endif // _os_tsd_get_base
 }
 
 #endif // _os_tsd_get_base
index 50f273a0de43877198a686bbb2c5d20002003020..6068a822c068662ccbf438a6d77a81a1145e9821 100644 (file)
@@ -25,6 +25,7 @@
 #define _QOS_PRIVATE_H
 
 #include <pthread/qos.h>
 #define _QOS_PRIVATE_H
 
 #include <pthread/qos.h>
+#include <pthread/priority_private.h>
 #include <sys/qos.h> /* qos_class_t */
 #include <sys/qos_private.h>
 
 #include <sys/qos.h> /* qos_class_t */
 #include <sys/qos_private.h>
 
 #include <mach/port.h>
 #endif
 
 #include <mach/port.h>
 #endif
 
-// pthread_priority_t is an on opaque integer that is guaranteed to be ordered such that
-// combations of QoS classes and relative priorities are ordered numerically, according to
-// their combined priority.
-typedef unsigned long pthread_priority_t;
-
-// masks for splitting the handling the contents of a pthread_priority_t, the mapping from
-// qos_class_t to the class bits, however, is intentionally not exposed.
-#define _PTHREAD_PRIORITY_FLAGS_MASK                   0xff000000
-#define _PTHREAD_PRIORITY_FLAGS_SHIFT                  (24ull)
-#define _PTHREAD_PRIORITY_ENCODING_MASK                        0x00a00000
-#define _PTHREAD_PRIORITY_ENCODING_SHIFT               (22ull)
-#define _PTHREAD_PRIORITY_ENCODING_V0                  0x00000000
-#define _PTHREAD_PRIORITY_ENCODING_V1                  0x00400000 /* unused */
-#define _PTHREAD_PRIORITY_ENCODING_V2                  0x00800000 /* unused */
-#define _PTHREAD_PRIORITY_ENCODING_V3                  0x00a00000 /* unused */
-#define _PTHREAD_PRIORITY_QOS_CLASS_MASK               0x003fff00
-#define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT              (8ull)
-#define _PTHREAD_PRIORITY_PRIORITY_MASK                        0x000000ff
-#define _PTHREAD_PRIORITY_PRIORITY_SHIFT               (0)
-
-#define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG              0x80000000
-#define _PTHREAD_PRIORITY_INHERIT_FLAG                 0x40000000
-#define _PTHREAD_PRIORITY_ROOTQUEUE_FLAG               0x20000000
-// Used to indicate to the pthread kext that the provided event manager thread
-// priority is actually a scheduling priority not a QoS.  We can have ROOTQUEUE_FLAG
-// perform double duty because it's never provided to the kernel.
-#define _PTHREAD_PRIORITY_SCHED_PRI_FLAG               0x20000000
-#define _PTHREAD_PRIORITY_SCHED_PRI_MASK               0x0000ffff
-#define _PTHREAD_PRIORITY_ENFORCE_FLAG                 0x10000000
-#define _PTHREAD_PRIORITY_OVERRIDE_FLAG                        0x08000000
-
-// libdispatch defines the following, so it's not safe to use for anything we
-// expect to be passed in from userspace
-#define _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG            0x04000000
-
-// The event manager flag indicates that this thread/request is for a event
-// manager thread.  There can only ever be one event manager thread at a time and
-// it is brought up at the highest of all event manager priorities passed to the
-// kext.
-#define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG   0x02000000
-#define _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG            0x01000000
-
 // redeffed here to avoid leaving __QOS_ENUM defined in the public header
 #define __QOS_ENUM(name, type, ...) enum { __VA_ARGS__ }; typedef type name##_t
 #define __QOS_AVAILABLE_10_10
 // redeffed here to avoid leaving __QOS_ENUM defined in the public header
 #define __QOS_ENUM(name, type, ...) enum { __VA_ARGS__ }; typedef type name##_t
 #define __QOS_AVAILABLE_10_10
index f91c1f6cf206875ceffa419853a2f82c2490c05e..f9260fb01b9f697e1bafcea12713c5dfb9a46f87 100644 (file)
 #define __TSD_RETURN_TO_KERNEL 5
 #endif
 
 #define __TSD_RETURN_TO_KERNEL 5
 #endif
 
+#ifndef __TSD_PTR_MUNGE
+#define __TSD_PTR_MUNGE 7
+#endif
+
 #ifndef __TSD_MACH_SPECIAL_REPLY
 #define __TSD_MACH_SPECIAL_REPLY 8
 #endif
 #ifndef __TSD_MACH_SPECIAL_REPLY
 #define __TSD_MACH_SPECIAL_REPLY 8
 #endif
@@ -81,6 +85,7 @@
 #define _PTHREAD_TSD_SLOT_MACH_THREAD_SELF __TSD_MACH_THREAD_SELF
 #define _PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS    __TSD_THREAD_QOS_CLASS
 #define _PTHREAD_TSD_SLOT_RETURN_TO_KERNEL __TSD_RETURN_TO_KERNEL
 #define _PTHREAD_TSD_SLOT_MACH_THREAD_SELF __TSD_MACH_THREAD_SELF
 #define _PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS    __TSD_THREAD_QOS_CLASS
 #define _PTHREAD_TSD_SLOT_RETURN_TO_KERNEL __TSD_RETURN_TO_KERNEL
+#define _PTHREAD_TSD_SLOT_PTR_MUNGE __TSD_PTR_MUNGE
 #define _PTHREAD_TSD_SLOT_MACH_SPECIAL_REPLY __TSD_MACH_SPECIAL_REPLY
 //#define _PTHREAD_TSD_SLOT_SEMAPHORE_CACHE __TSD_SEMAPHORE_CACHE
 
 #define _PTHREAD_TSD_SLOT_MACH_SPECIAL_REPLY __TSD_MACH_SPECIAL_REPLY
 //#define _PTHREAD_TSD_SLOT_SEMAPHORE_CACHE __TSD_SEMAPHORE_CACHE
 
index 0b0a001e9bbeb51de9f45492b4f0ecc984f943a6..9cd0e951d6df271b6fbb472a48d552a7b3572436 100644 (file)
@@ -179,6 +179,14 @@ __API_AVAILABLE(macos(10.10.2))
 int
 _pthread_workqueue_asynchronous_override_reset_all_self(void);
 
 int
 _pthread_workqueue_asynchronous_override_reset_all_self(void);
 
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int
+_pthread_workloop_create(uint64_t workloop_id, uint64_t options, pthread_attr_t *attr);
+
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int
+_pthread_workloop_destroy(uint64_t workloop_id);
+
 __END_DECLS
 
 #endif // __PTHREAD_WORKQUEUE_H__
 __END_DECLS
 
 #endif // __PTHREAD_WORKQUEUE_H__
index 18292085d5c1ce4c7a5611c338eeffd7ec86acbf..10b719a3da20ce9fc4f51a014fa243287e2a2609 100644 (file)
@@ -64,18 +64,40 @@ typedef void (*pthread_introspection_hook_t)(unsigned int event,
 
 /*!
  * @enum pthread_introspection_event_t
 
 /*!
  * @enum pthread_introspection_event_t
+ * Events sent by libpthread about threads lifetimes.
  *
  *
- * @constant PTHREAD_INTROSPECTION_THREAD_CREATE
- * pthread_t was created.
+ * @const PTHREAD_INTROSPECTION_THREAD_CREATE
+ * The specified pthread_t was created, and there will be a paired
+ * PTHREAD_INTROSPECTION_THREAD_DESTROY event. However, there may not be
+ * a START/TERMINATE pair of events for this pthread_t.
  *
  *
- * @constant PTHREAD_INTROSPECTION_THREAD_START
- * Thread has started and stack was allocated.
+ * Starting with macOS 10.14, and iOS 12, this event is always sent before
+ * PTHREAD_INTROSPECTION_THREAD_START is sent. This event is however not sent
+ * for the main thread.
  *
  *
- * @constant PTHREAD_INTROSPECTION_THREAD_TERMINATE
- * Thread is about to be terminated and stack will be deallocated.
+ * This event may not be sent from the context of the passed in pthread_t.
  *
  *
- * @constant PTHREAD_INTROSPECTION_THREAD_DESTROY
- * pthread_t is about to be destroyed.
+ * Note that all properties of this thread may not be functional yet, and it is
+ * not permitted to call functions on this thread past observing its address.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_START
+ * Thread has started and its stack was allocated. There will be a matching
+ * PTHREAD_INTROSPECTION_THREAD_TERMINATE event.
+ *
+ * This event is always sent from the context of the passed in pthread_t.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_TERMINATE
+ * Thread is about to be terminated and stack will be deallocated. This always
+ * matches a PTHREAD_INTROSPECTION_THREAD_START event.
+ *
+ * This event is always sent from the context of the passed in pthread_t.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_DESTROY
+ * pthread_t is about to be destroyed. This always matches
+ * a PTHREAD_INTROSPECTION_THREAD_CREATE event, but there may not have been
+ * a START/TERMINATE pair of events for this pthread_t.
+ *
+ * This event may not be sent from the context of the passed in pthread_t.
  */
 enum {
        PTHREAD_INTROSPECTION_THREAD_CREATE = 1,
  */
 enum {
        PTHREAD_INTROSPECTION_THREAD_CREATE = 1,
index 0e2ecb73b9a4baf217a471803cd9ef93f6e27c9a..f5fdff6b7e126f635b7204b0779a15be08c320c3 100644 (file)
@@ -171,6 +171,12 @@ __BEGIN_DECLS
 #define PTHREAD_MUTEX_RECURSIVE                2
 #define PTHREAD_MUTEX_DEFAULT          PTHREAD_MUTEX_NORMAL
 
 #define PTHREAD_MUTEX_RECURSIVE                2
 #define PTHREAD_MUTEX_DEFAULT          PTHREAD_MUTEX_NORMAL
 
+/*
+ * Mutex policy attributes
+ */
+#define PTHREAD_MUTEX_POLICY_FAIRSHARE_NP   1
+#define PTHREAD_MUTEX_POLICY_FIRSTFIT_NP    3
+
 /*
  * RWLock variables
  */
 /*
  * RWLock variables
  */
@@ -405,6 +411,10 @@ __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_mutexattr_gettype(const pthread_mutexattr_t * __restrict,
                int * __restrict);
 
 int pthread_mutexattr_gettype(const pthread_mutexattr_t * __restrict,
                int * __restrict);
 
+__API_AVAILABLE(macos(10.13.4), ios(11.3), watchos(4.3), tvos(11.3))
+int pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t * __restrict,
+               int * __restrict);
+
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_mutexattr_init(pthread_mutexattr_t *);
 
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_mutexattr_init(pthread_mutexattr_t *);
 
@@ -420,6 +430,9 @@ int pthread_mutexattr_setpshared(pthread_mutexattr_t *, int);
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_mutexattr_settype(pthread_mutexattr_t *, int);
 
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_mutexattr_settype(pthread_mutexattr_t *, int);
 
+__API_AVAILABLE(macos(10.7), ios(5.0))
+int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int);
+
 __SWIFT_UNAVAILABLE_MSG("Use lazily initialized globals instead")
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_once(pthread_once_t *, void (* _Nonnull)(void));
 __SWIFT_UNAVAILABLE_MSG("Use lazily initialized globals instead")
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_once(pthread_once_t *, void (* _Nonnull)(void));
index a0ba75430641bb3f5e143aa7470a0e496966640d..91fb6419fa42e6734f1aaf13b13fd4f0ce3f6c21 100644 (file)
@@ -63,19 +63,13 @@ __BEGIN_DECLS
 #if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE)
 /* firstfit */
 #define PTHREAD_FIRSTFIT_MUTEX_INITIALIZER {_PTHREAD_FIRSTFIT_MUTEX_SIG_init, {0}}
 #if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE)
 /* firstfit */
 #define PTHREAD_FIRSTFIT_MUTEX_INITIALIZER {_PTHREAD_FIRSTFIT_MUTEX_SIG_init, {0}}
+
 /*
  * Mutex attributes
  */
 /*
  * Mutex attributes
  */
-#define _PTHREAD_MUTEX_POLICY_NONE             0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE                1
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT         2
-
-/* manipulate the mutex policy attributes */
-__API_AVAILABLE(macos(10.7), ios(5.0))
-int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int );
-
-__API_AVAILABLE(macos(10.13.4), ios(11.3))
-int pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *, int * );
+#define _PTHREAD_MUTEX_POLICY_NONE                     PTHREAD_MUTEX_POLICY_NONE
+#define _PTHREAD_MUTEX_POLICY_FAIRSHARE                PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+#define _PTHREAD_MUTEX_POLICY_FIRSTFIT         PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
 
 #endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */
 
 
 #endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */
 
diff --git a/pthread/stack_np.h b/pthread/stack_np.h
new file mode 100644 (file)
index 0000000..9b5f513
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+#ifndef __PTHREAD_STACK_NP__
+#define __PTHREAD_STACK_NP__
+
+#include <Availability.h>
+#include <sys/cdefs.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <os/base.h>
+
+OS_ASSUME_NONNULL_BEGIN
+
+/*! @header
+ * Low-level API to introspect thread stacks.
+ */
+
+__BEGIN_DECLS
+
+/*!
+ * @function pthread_stack_frame_decode_np
+ *
+ * @abstract
+ * Decodes the return address and the next stack frame address
+ * from the given stack frame address.
+ *
+ * @discussion
+ * Validation of the frame address is not performed by this function.
+ * The caller is responsible for making sure the frame address is valid,
+ * for example using pthread_get_stackaddr_np() and pthread_get_stacksize_np().
+ *
+ * @param frame_addr
+ * A valid stack frame address such as __builtin_frame_address(0) or the return
+ * value of a previous call to pthread_stack_frame_decode_np().
+ *
+ * @param return_addr
+ * An optional out paramter that will be filled with the return address stored
+ * at the specified stack frame.
+ *
+ * @returns
+ * This returns the next frame address stored at the specified stack frame.
+ */
+__OSX_AVAILABLE(10.14) __IOS_AVAILABLE(12.0)
+__TVOS_AVAILABLE(12.0) __WATCHOS_AVAILABLE(5.0)
+uintptr_t
+pthread_stack_frame_decode_np(uintptr_t frame_addr,
+               uintptr_t *_Nullable return_addr);
+
+__END_DECLS
+
+OS_ASSUME_NONNULL_END
+
+#endif // __PTHREAD_STACK_NP__
index 9f2e127f37789a1e3c1e48ca3529bed25802476c..c9c16c7ae66d9e1733e1e5b1ea7e4095a2bc2c16 100644 (file)
@@ -70,6 +70,8 @@ typedef struct _pthread_attr_t pthread_attr_t;
 #include <mach/mach.h>
 #include <mach/mach_error.h>
 #include <sys/queue.h>
 #include <mach/mach.h>
 #include <mach/mach_error.h>
 #include <sys/queue.h>
+#include <pthread/bsdthread_private.h>
+#include <pthread/workqueue_syscalls.h>
 
 #define __OS_EXPOSE_INTERNALS__ 1
 #include <os/internal/internal_shared.h>
 
 #define __OS_EXPOSE_INTERNALS__ 1
 #include <os/internal/internal_shared.h>
@@ -125,19 +127,24 @@ typedef os_unfair_lock _pthread_lock;
 #define _PTHREAD_UNLOCK(lock) os_unfair_lock_unlock_inline(&(lock))
 #define _PTHREAD_UNLOCK_FROM_MACH_THREAD(lock) os_unfair_lock_unlock_inline_no_tsd_4libpthread(&(lock))
 
 #define _PTHREAD_UNLOCK(lock) os_unfair_lock_unlock_inline(&(lock))
 #define _PTHREAD_UNLOCK_FROM_MACH_THREAD(lock) os_unfair_lock_unlock_inline_no_tsd_4libpthread(&(lock))
 
+#define _PTHREAD_POLICY_IS_FIXEDPRI(x) ((x) == SCHED_RR || (x) == SCHED_FIFO)
+
+extern int __is_threaded;
+extern int __unix_conforming;
+
 // List of all pthreads in the process.
 TAILQ_HEAD(__pthread_list, _pthread);
 // List of all pthreads in the process.
 TAILQ_HEAD(__pthread_list, _pthread);
-extern struct __pthread_list __pthread_head;
+PTHREAD_NOEXPORT extern struct __pthread_list __pthread_head;
 
 // Lock protects access to above list.
 
 // Lock protects access to above list.
-extern _pthread_lock _pthread_list_lock;
+PTHREAD_NOEXPORT extern _pthread_lock _pthread_list_lock;
 
 
-extern int __is_threaded;
+PTHREAD_NOEXPORT extern uint32_t _main_qos;
 
 #if PTHREAD_DEBUG_LOG
 #include <mach/mach_time.h>
 
 #if PTHREAD_DEBUG_LOG
 #include <mach/mach_time.h>
-extern int _pthread_debuglog;
-extern uint64_t _pthread_debugstart;
+PTHREAD_NOEXPORT extern int _pthread_debuglog;
+PTHREAD_NOEXPORT extern uint64_t _pthread_debugstart;
 #endif
 
 /*
 #endif
 
 /*
@@ -153,6 +160,8 @@ extern uint64_t _pthread_debugstart;
 #define _INTERNAL_POSIX_THREAD_KEYS_END 768
 #endif
 
 #define _INTERNAL_POSIX_THREAD_KEYS_END 768
 #endif
 
+#define PTHREAD_T_OFFSET 0
+
 #define MAXTHREADNAMESIZE      64
 #define _PTHREAD_T
 typedef struct _pthread {
 #define MAXTHREADNAMESIZE      64
 #define _PTHREAD_T
 typedef struct _pthread {
@@ -165,52 +174,56 @@ typedef struct _pthread {
        //
        // SPI - These fields are private.
        //
        //
        // SPI - These fields are private.
        //
-       // these fields are globally protected by _pthread_list_lock:
-       uint32_t childrun:1,
-                       parentcheck:1,
-                       childexit:1,
-                       pad3:29;
-
-       _pthread_lock lock; // protect access to everything below
-       uint32_t detached:8,
-                       inherit:8,
-                       policy:8,
-                       kernalloc:1,
-                       schedset:1,
-                       wqthread:1,
-                       wqkillset:1,
-                       pad:4;
-
-#if defined(__LP64__)
-       uint32_t pad0;
-#endif
-
-       void *(*fun)(void*);    // thread start routine
-       void *arg;              // thread start routine argument
-       void *exit_value;       // thread exit value storage
-
-       semaphore_t joiner_notify;      // pthread_join notification
-
-       int max_tsd_key;
-       int cancel_state;       // whether the thread can be cancelled
-       int cancel_error;
 
 
-       int err_no;             // thread-local errno
+       //
+       // Fields protected by _pthread_list_lock
+       //
 
 
-       struct _pthread *joiner;
+       TAILQ_ENTRY(_pthread) tl_plist; // global thread list [aligned]
+       struct pthread_join_context_s *tl_join_ctx;
+       void *tl_exit_value;
+       uint32_t tl_policy:8,
+                       tl_joinable:1,
+                       tl_joiner_cleans_up:1,
+                       tl_has_custom_stack:1,
+                       __tl_pad:21;
+       // MACH_PORT_NULL if no joiner
+       // tsd[_PTHREAD_TSD_SLOT_MACH_THREAD_SELF] when has a joiner
+       // MACH_PORT_DEAD if the thread exited
+       uint32_t tl_exit_gate;
+       struct sched_param tl_param;
 
 
-       struct sched_param param;       // [aligned]
+       //
+       // Fields protected by pthread_t::lock
+       //
 
 
-       TAILQ_ENTRY(_pthread) plist;    // global thread list [aligned]
+       _pthread_lock lock;
+       uint16_t max_tsd_key;
+       uint16_t inherit:8,
+                       kernalloc:1,
+                       schedset:1,
+                       wqthread:1,
+                       wqkillset:1,
+                       wqoutsideqos:1,
+                       __flags_pad:3;
 
        char pthread_name[MAXTHREADNAMESIZE];   // includes NUL [aligned]
 
 
        char pthread_name[MAXTHREADNAMESIZE];   // includes NUL [aligned]
 
-       void *stackaddr;        // base of the stack (page aligned)
-       size_t stacksize;       // size of stack (page multiple and >= PTHREAD_STACK_MIN)
-
-       void* freeaddr;         // stack/thread allocation base address
-       size_t freesize;        // stack/thread allocation size
-       size_t guardsize;       // guard page size in bytes
+       void *(*fun)(void *);   // thread start routine
+       void *wq_kqid_ptr;              // wqthreads (workloop)
+       void *arg;                              // thread start routine argument
+       int   wq_nevents;               // wqthreads (workloop / kevent)
+       uint16_t wq_retop;              // wqthreads
+       uint8_t cancel_state;   // whether the thread can be canceled [atomic]
+       uint8_t canceled;               // 4597450 set if conformant cancelation happened
+       errno_t cancel_error;
+       errno_t err_no;                 // thread-local errno
+
+       void *stackaddr;                // base of the stack (page aligned)
+       void *stackbottom;              // stackaddr - stacksize
+       void *freeaddr;                 // stack/thread allocation base address
+       size_t freesize;                // stack/thread allocation size
+       size_t guardsize;               // guard page size in bytes
 
        // tsd-base relative accessed elements
        __attribute__((aligned(8)))
 
        // tsd-base relative accessed elements
        __attribute__((aligned(8)))
@@ -228,39 +241,39 @@ typedef struct _pthread {
        void *tsd[_EXTERNAL_POSIX_THREAD_KEYS_MAX + _INTERNAL_POSIX_THREAD_KEYS_MAX];
 } *pthread_t;
 
        void *tsd[_EXTERNAL_POSIX_THREAD_KEYS_MAX + _INTERNAL_POSIX_THREAD_KEYS_MAX];
 } *pthread_t;
 
-
+#define _PTHREAD_ATTR_REFILLMS_MAX ((2<<24) - 1)
 struct _pthread_attr_t {
 struct _pthread_attr_t {
-       long sig;
-       _pthread_lock lock;
-       uint32_t detached:8,
+       long   sig;
+       size_t guardsize; // size in bytes of stack overflow guard area
+       void  *stackaddr; // stack base; vm_page_size aligned
+       size_t stacksize; // stack size; multiple of vm_page_size and >= PTHREAD_STACK_MIN
+       union {
+               struct sched_param param; // [aligned]
+               unsigned long qosclass; // pthread_priority_t
+       };
+       uint32_t
+               detached:8,
                inherit:8,
                policy:8,
                inherit:8,
                policy:8,
-               fastpath:1,
                schedset:1,
                qosset:1,
                schedset:1,
                qosset:1,
-               unused:5;
-       struct sched_param param; // [aligned]
-       void *stackaddr; // stack base; vm_page_size aligned
-       size_t stacksize; // stack size; multiple of vm_page_size and >= PTHREAD_STACK_MIN
-       size_t guardsize; // size in bytes of stack overflow guard area
-       unsigned long qosclass;
+               policyset:1,
+               cpupercentset:1,
+               defaultguardpage:1,
+               unused:3;
+       uint32_t
+               cpupercent:8,
+               refillms:24;
 #if defined(__LP64__)
 #if defined(__LP64__)
-       uint32_t _reserved[2];
+       uint32_t _reserved[4];
 #else
 #else
-       uint32_t _reserved[1];
+       uint32_t _reserved[2];
 #endif
 };
 
 /*
  * Mutex attributes
  */
 #endif
 };
 
 /*
  * Mutex attributes
  */
-#define _PTHREAD_MUTEX_POLICY_NONE             0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE                1
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT         2
-#define _PTHREAD_MUTEX_POLICY_REALTIME         3
-#define _PTHREAD_MUTEX_POLICY_ADAPTIVE         4
-#define _PTHREAD_MUTEX_POLICY_PRIPROTECT       5
-#define _PTHREAD_MUTEX_POLICY_PRIINHERIT       6
 
 #define _PTHREAD_MUTEXATTR_T
 typedef struct {
 
 #define _PTHREAD_MUTEXATTR_T
 typedef struct {
@@ -269,7 +282,7 @@ typedef struct {
        uint32_t protocol:2,
                type:2,
                pshared:2,
        uint32_t protocol:2,
                type:2,
                pshared:2,
-               policy:3,
+               opt:3,
                unused:23;
 } pthread_mutexattr_t;
 
                unused:23;
 } pthread_mutexattr_t;
 
@@ -285,6 +298,21 @@ struct _pthread_mutex_options {
                unused:2,
                lock_count:16;
 };
                unused:2,
                lock_count:16;
 };
+//
+#define _PTHREAD_MUTEX_POLICY_LAST             (PTHREAD_MUTEX_POLICY_FIRSTFIT_NP + 1)
+#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE 1
+#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT 2
+#define _PTHREAD_MTX_OPT_POLICY_DEFAULT _PTHREAD_MTX_OPT_POLICY_FIRSTFIT
+// The following _pthread_mutex_options defintions exist in synch_internal.h
+// such that the kernel extension can test for flags. They must be kept in
+// sync with the bit values in the struct above.
+// _PTHREAD_MTX_OPT_PSHARED 0x010
+// _PTHREAD_MTX_OPT_NOTIFY 0x1000
+// _PTHREAD_MTX_OPT_MUTEX 0x2000
+
+// The fixed mask is used to mask out portions of the mutex options that
+// change on a regular basis (notify, lock_count).
+#define _PTHREAD_MTX_OPT_FIXED_MASK    0x27ff
 
 typedef struct {
        long sig;
 
 typedef struct {
        long sig;
@@ -429,12 +457,6 @@ _pthread_selfid_direct(void)
 #define _PTHREAD_KERN_MUTEX_SIG                0x34567812  /*  */
 #define _PTHREAD_KERN_RWLOCK_SIG       0x56781234  /*  */
 
 #define _PTHREAD_KERN_MUTEX_SIG                0x34567812  /*  */
 #define _PTHREAD_KERN_RWLOCK_SIG       0x56781234  /*  */
 
-#define _PTHREAD_CREATE_PARENT         4
-#define _PTHREAD_EXITED                        8
-// 4597450: begin
-#define _PTHREAD_WASCANCEL             0x10
-// 4597450: end
-
 #if defined(DEBUG)
 #define _PTHREAD_MUTEX_OWNER_SELF      pthread_self()
 #else
 #if defined(DEBUG)
 #define _PTHREAD_MUTEX_OWNER_SELF      pthread_self()
 #else
@@ -454,11 +476,6 @@ extern boolean_t swtch_pri(int);
 /* Prototypes. */
 
 /* Internal globals. */
 /* Prototypes. */
 
 /* Internal globals. */
-PTHREAD_NOEXPORT extern int __pthread_supported_features;
-
-/* Functions defined in machine-dependent files. */
-PTHREAD_NOEXPORT void _pthread_setup(pthread_t th, void (*f)(pthread_t), void *sp, int suspended, int needresume);
-
 PTHREAD_NOEXPORT void _pthread_tsd_cleanup(pthread_t self);
 
 PTHREAD_NOEXPORT int _pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t * flagp, uint32_t ** pmtxp, uint32_t * mgenp, uint32_t * ugenp);
 PTHREAD_NOEXPORT void _pthread_tsd_cleanup(pthread_t self);
 
 PTHREAD_NOEXPORT int _pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t * flagp, uint32_t ** pmtxp, uint32_t * mgenp, uint32_t * ugenp);
@@ -468,8 +485,8 @@ PTHREAD_NOEXPORT void* malloc(size_t);
 PTHREAD_NOEXPORT void free(void*);
 
 /* syscall interfaces */
 PTHREAD_NOEXPORT void free(void*);
 
 /* syscall interfaces */
-extern uint32_t __psynch_mutexwait(pthread_mutex_t * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
-extern uint32_t __psynch_mutexdrop(pthread_mutex_t * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
+extern uint32_t __psynch_mutexwait(_pthread_mutex * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
+extern uint32_t __psynch_mutexdrop(_pthread_mutex * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
 
 extern uint32_t __psynch_cvbroad(pthread_cond_t * cv, uint64_t cvlsgen, uint64_t cvudgen, uint32_t flags, pthread_mutex_t * mutex,  uint64_t mugen, uint64_t tid);
 extern uint32_t __psynch_cvsignal(pthread_cond_t * cv, uint64_t cvlsgen, uint32_t cvugen, int thread_port, pthread_mutex_t * mutex,  uint64_t mugen, uint64_t tid, uint32_t flags);
 
 extern uint32_t __psynch_cvbroad(pthread_cond_t * cv, uint64_t cvlsgen, uint64_t cvudgen, uint32_t flags, pthread_mutex_t * mutex,  uint64_t mugen, uint64_t tid);
 extern uint32_t __psynch_cvsignal(pthread_cond_t * cv, uint64_t cvlsgen, uint32_t cvugen, int thread_port, pthread_mutex_t * mutex,  uint64_t mugen, uint64_t tid, uint32_t flags);
@@ -489,7 +506,9 @@ PTHREAD_EXTERN
 int
 __proc_info(int callnum, int pid, int flavor, uint64_t arg, void * buffer, int buffersize);
 
 int
 __proc_info(int callnum, int pid, int flavor, uint64_t arg, void * buffer, int buffersize);
 
-PTHREAD_NOEXPORT int _pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming);
+PTHREAD_NOEXPORT
+void
+_pthread_deallocate(pthread_t t, bool from_mach_thread);
 
 PTHREAD_NORETURN PTHREAD_NOEXPORT
 void
 
 PTHREAD_NORETURN PTHREAD_NOEXPORT
 void
@@ -499,6 +518,10 @@ PTHREAD_NORETURN PTHREAD_NOEXPORT
 void
 __pthread_abort_reason(const char *fmt, ...) __printflike(1,2);
 
 void
 __pthread_abort_reason(const char *fmt, ...) __printflike(1,2);
 
+PTHREAD_NOEXPORT
+thread_qos_t
+_pthread_qos_class_to_thread_qos(qos_class_t qos);
+
 PTHREAD_NOEXPORT
 void
 _pthread_set_main_qos(pthread_priority_t qos);
 PTHREAD_NOEXPORT
 void
 _pthread_set_main_qos(pthread_priority_t qos);
@@ -515,7 +538,7 @@ PTHREAD_EXPORT
 void
 _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
 
 void
 _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
 
-PTHREAD_EXPORT
+PTHREAD_NORETURN PTHREAD_EXPORT
 void
 _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int flags, int nkevents);
 
 void
 _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int flags, int nkevents);
 
@@ -531,9 +554,13 @@ PTHREAD_NOEXPORT_VARIANT
 void
 _pthread_clear_qos_tsd(mach_port_t thread_port);
 
 void
 _pthread_clear_qos_tsd(mach_port_t thread_port);
 
+#define PTHREAD_CONFORM_DARWIN_LEGACY     0
+#define PTHREAD_CONFORM_UNIX03_NOCANCEL   1
+#define PTHREAD_CONFORM_UNIX03_CANCELABLE 2
+
 PTHREAD_NOEXPORT_VARIANT
 void
 PTHREAD_NOEXPORT_VARIANT
 void
-_pthread_testcancel(pthread_t thread, int isconforming);
+_pthread_testcancel(int conforming);
 
 PTHREAD_EXPORT
 void
 
 PTHREAD_EXPORT
 void
@@ -545,11 +572,11 @@ _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport);
 
 PTHREAD_NOEXPORT
 void
 
 PTHREAD_NOEXPORT
 void
-_pthread_setcancelstate_exit(pthread_t self, void *value_ptr, int conforming);
+_pthread_setcancelstate_exit(pthread_t self, void *value_ptr);
 
 PTHREAD_NOEXPORT
 
 PTHREAD_NOEXPORT
-void *
-_pthread_get_exit_value(pthread_t t, int conforming);
+semaphore_t
+_pthread_joiner_prepost_wake(pthread_t thread);
 
 PTHREAD_ALWAYS_INLINE
 static inline mach_port_t
 
 PTHREAD_ALWAYS_INLINE
 static inline mach_port_t
@@ -647,60 +674,54 @@ _pthread_rwlock_check_signature_init(_pthread_rwlock *rwlock)
        return (rwlock->sig == _PTHREAD_RWLOCK_SIG_init);
 }
 
        return (rwlock->sig == _PTHREAD_RWLOCK_SIG_init);
 }
 
-/* ALWAYS called with list lock and return with list lock */
+/*
+ * ALWAYS called without list lock and return with list lock held on success
+ *
+ * This weird calling convention exists because this function will sometimes
+ * drop the lock, and it's best callers don't have to remember this.
+ */
 PTHREAD_ALWAYS_INLINE
 static inline bool
 PTHREAD_ALWAYS_INLINE
 static inline bool
-_pthread_is_valid_locked(pthread_t thread)
+_pthread_validate_thread_and_list_lock(pthread_t thread)
 {
        pthread_t p;
 {
        pthread_t p;
+       if (thread == NULL) return false;
 loop:
 loop:
-       TAILQ_FOREACH(p, &__pthread_head, plist) {
-               if (p == thread) {
-                       int state = os_atomic_load(&p->cancel_state, relaxed);
-                       if (state & _PTHREAD_CANCEL_INITIALIZED) {
-                               return true;
+       _PTHREAD_LOCK(_pthread_list_lock);
+       TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
+               if (p != thread) continue;
+               int state = os_atomic_load(&p->cancel_state, relaxed);
+               if (os_likely(state & _PTHREAD_CANCEL_INITIALIZED)) {
+                       if (os_unlikely(p->sig != _PTHREAD_SIG)) {
+                               PTHREAD_CLIENT_CRASH(0, "pthread_t was corrupted");
                        }
                        }
-                       _PTHREAD_UNLOCK(_pthread_list_lock);
-                       thread_switch(_pthread_kernel_thread(p),
-                                       SWITCH_OPTION_OSLOCK_DEPRESS, 1);
-                       _PTHREAD_LOCK(_pthread_list_lock);
-                       goto loop;
+                       return true;
                }
                }
+               _PTHREAD_UNLOCK(_pthread_list_lock);
+               thread_switch(_pthread_kernel_thread(p),
+                                         SWITCH_OPTION_OSLOCK_DEPRESS, 1);
+               goto loop;
        }
        }
+       _PTHREAD_UNLOCK(_pthread_list_lock);
 
        return false;
 }
 
 
        return false;
 }
 
-#define PTHREAD_IS_VALID_LOCK_THREAD 0x1
-
 PTHREAD_ALWAYS_INLINE
 static inline bool
 PTHREAD_ALWAYS_INLINE
 static inline bool
-_pthread_is_valid(pthread_t thread, int flags, mach_port_t *portp)
+_pthread_is_valid(pthread_t thread, mach_port_t *portp)
 {
        mach_port_t kport = MACH_PORT_NULL;
        bool valid;
 
 {
        mach_port_t kport = MACH_PORT_NULL;
        bool valid;
 
-       if (thread == NULL) {
-               return false;
-       }
-
        if (thread == pthread_self()) {
                valid = true;
                kport = _pthread_kernel_thread(thread);
        if (thread == pthread_self()) {
                valid = true;
                kport = _pthread_kernel_thread(thread);
-               if (flags & PTHREAD_IS_VALID_LOCK_THREAD) {
-                       _PTHREAD_LOCK(thread->lock);
-               }
+       } else if (!_pthread_validate_thread_and_list_lock(thread)) {
+               valid = false;
        } else {
        } else {
-               _PTHREAD_LOCK(_pthread_list_lock);
-               if (_pthread_is_valid_locked(thread)) {
-                       kport = _pthread_kernel_thread(thread);
-                       valid = true;
-                       if (flags & PTHREAD_IS_VALID_LOCK_THREAD) {
-                               _PTHREAD_LOCK(thread->lock);
-                       }
-               } else {
-                       valid = false;
-               }
+               kport = _pthread_kernel_thread(thread);
+               valid = true;
                _PTHREAD_UNLOCK(_pthread_list_lock);
        }
 
                _PTHREAD_UNLOCK(_pthread_list_lock);
        }
 
diff --git a/src/offsets.h b/src/offsets.h
new file mode 100644 (file)
index 0000000..0e20385
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef _POSIX_PTHREAD_OFFSETS_H
+#define _POSIX_PTHREAD_OFFSETS_H
+
+#ifndef __ASSEMBLER__
+#define check_backward_offset(field, value) \
+               _Static_assert(offsetof(struct _pthread, tsd) + value == \
+                               offsetof(struct _pthread, field), #value " is correct")
+#define check_forward_offset(field, value) \
+               _Static_assert(offsetof(struct _pthread, field) == value, \
+                               #value " is correct")
+#else
+#define check_backward_offset(field, value)
+#define check_forward_offset(field, value)
+#endif // __ASSEMBLER__
+
+#if defined(__i386__)
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   140
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET 144
+#elif __LP64__
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   -48
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET -40
+#else
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   -36
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET -32
+#endif
+
+#if defined(__i386__)
+check_forward_offset(stackaddr, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET);
+check_forward_offset(stackbottom, _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET);
+#else
+check_backward_offset(stackaddr, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET);
+check_backward_offset(stackbottom, _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET);
+#endif
+
+#endif /* _POSIX_PTHREAD_OFFSETS_H */
index 8e63bd3a06e5321b0fd46f22cb5df93b62f7e3cb..c9c1b9bc4cceeb02dda250511b51086b09e31acf 100644 (file)
@@ -56,6 +56,8 @@
 #include "introspection_private.h"
 #include "qos_private.h"
 #include "tsd_private.h"
 #include "introspection_private.h"
 #include "qos_private.h"
 #include "tsd_private.h"
+#include "pthread/stack_np.h"
+#include "offsets.h" // included to validate the offsets at build time
 
 #include <stdlib.h>
 #include <errno.h>
 
 #include <stdlib.h>
 #include <errno.h>
 #include <unistd.h>
 #include <mach/mach_init.h>
 #include <mach/mach_vm.h>
 #include <unistd.h>
 #include <mach/mach_init.h>
 #include <mach/mach_vm.h>
+#include <mach/mach_sync_ipc.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
+#include <sys/ulock.h>
 #include <sys/mman.h>
 #include <machine/vmparam.h>
 #define        __APPLE_API_PRIVATE
 #include <sys/mman.h>
 #include <machine/vmparam.h>
 #define        __APPLE_API_PRIVATE
 #include <platform/compat.h>
 
 extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
 #include <platform/compat.h>
 
 extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
-                    void *newp, size_t newlen);
+               void *newp, size_t newlen);
 extern void __exit(int) __attribute__((noreturn));
 extern int __pthread_kill(mach_port_t, int);
 
 extern void __exit(int) __attribute__((noreturn));
 extern int __pthread_kill(mach_port_t, int);
 
-extern struct _pthread _thread;
-extern int default_priority;
+extern void _pthread_joiner_wake(pthread_t thread);
 
 
+#if !VARIANT_DYLD
+PTHREAD_NOEXPORT extern struct _pthread *_main_thread_ptr;
+#define main_thread() (_main_thread_ptr)
+#endif // VARIANT_DYLD
 
 
-//
-// Global variables
-//
+// Default stack size is 512KB; independent of the main thread's stack size.
+#define DEFAULT_STACK_SIZE (size_t)(512 * 1024)
 
 
-static void (*exitf)(int) = __exit;
-PTHREAD_NOEXPORT void* (*_pthread_malloc)(size_t) = NULL;
-PTHREAD_NOEXPORT void (*_pthread_free)(void *) = NULL;
 
 
-#if PTHREAD_DEBUG_LOG
-#include <fcntl.h>
-int _pthread_debuglog;
-uint64_t _pthread_debugstart;
-#endif
-
-// This global should be used (carefully) by anyone needing to know if a
-// pthread (other than the main thread) has been created.
-int __is_threaded = 0;
+//
+// Global constants
+//
 
 
-int __unix_conforming = 0;
+/*
+ * The pthread may be offset into a page.  In that event, by contract
+ * with the kernel, the allocation will extend PTHREAD_SIZE from the
+ * start of the next page.  There's also one page worth of allocation
+ * below stacksize for the guard page. <rdar://problem/19941744>
+ */
+#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
+#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size)
+#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize))
 
 
-// _pthread_list_lock protects _pthread_count, access to the __pthread_head
-// list, and the parentcheck, childrun and childexit flags of the pthread
-// structure. Externally imported by pthread_cancelable.c.
-PTHREAD_NOEXPORT _pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER;
-PTHREAD_NOEXPORT struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head);
-static int _pthread_count = 1;
+static const pthread_attr_t _pthread_attr_default = {
+       .sig       = _PTHREAD_ATTR_SIG,
+       .stacksize = 0,
+       .detached  = PTHREAD_CREATE_JOINABLE,
+       .inherit   = _PTHREAD_DEFAULT_INHERITSCHED,
+       .policy    = _PTHREAD_DEFAULT_POLICY,
+       .defaultguardpage = true,
+       // compile time constant for _pthread_default_priority(0)
+       .qosclass  = (1U << (THREAD_QOS_LEGACY - 1 + _PTHREAD_PRIORITY_QOS_CLASS_SHIFT)) |
+                       ((uint8_t)-1 & _PTHREAD_PRIORITY_PRIORITY_MASK),
+};
 
 #if PTHREAD_LAYOUT_SPI
 
 
 #if PTHREAD_LAYOUT_SPI
 
@@ -124,315 +134,154 @@ const struct pthread_layout_offsets_s pthread_layout_offsets = {
 #endif // PTHREAD_LAYOUT_SPI
 
 //
 #endif // PTHREAD_LAYOUT_SPI
 
 //
-// Static variables
+// Global exported variables
 //
 
 //
 
-// Mach message notification that a thread needs to be recycled.
-typedef struct _pthread_reap_msg_t {
-       mach_msg_header_t header;
-       pthread_t thread;
-       mach_msg_trailer_t trailer;
-} pthread_reap_msg_t;
+// This global should be used (carefully) by anyone needing to know if a
+// pthread (other than the main thread) has been created.
+int __is_threaded = 0;
+int __unix_conforming = 0;
 
 
-/*
- * The pthread may be offset into a page.  In that event, by contract
- * with the kernel, the allocation will extend PTHREAD_SIZE from the
- * start of the next page.  There's also one page worth of allocation
- * below stacksize for the guard page. <rdar://problem/19941744>
- */
-#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
-#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size)
-#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize))
+//
+// Global internal variables
+//
 
 
-static pthread_attr_t _pthread_attr_default = { };
+// _pthread_list_lock protects _pthread_count, access to the __pthread_head
+// list. Externally imported by pthread_cancelable.c.
+struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head);
+_pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER;
+
+uint32_t _main_qos;
 
 
+#if VARIANT_DYLD
 // The main thread's pthread_t
 // The main thread's pthread_t
-PTHREAD_NOEXPORT struct _pthread _thread __attribute__((aligned(64))) = { };
+struct _pthread _main_thread __attribute__((aligned(64))) = { };
+#define main_thread() (&_main_thread)
+#else // VARIANT_DYLD
+struct _pthread *_main_thread_ptr;
+#endif // VARIANT_DYLD
 
 
-PTHREAD_NOEXPORT int default_priority;
-static int max_priority;
-static int min_priority;
+#if PTHREAD_DEBUG_LOG
+#include <fcntl.h>
+int _pthread_debuglog;
+uint64_t _pthread_debugstart;
+#endif
+
+//
+// Global static variables
+//
+static bool __workq_newapi;
+static uint8_t default_priority;
+#if !VARIANT_DYLD
+static uint8_t max_priority;
+static uint8_t min_priority;
+#endif // !VARIANT_DYLD
+static int _pthread_count = 1;
 static int pthread_concurrency;
 static int pthread_concurrency;
+static uintptr_t _pthread_ptr_munge_token;
+
+static void (*exitf)(int) = __exit;
+#if !VARIANT_DYLD
+static void *(*_pthread_malloc)(size_t) = NULL;
+static void (*_pthread_free)(void *) = NULL;
+#endif // !VARIANT_DYLD
 
 // work queue support data
 
 // work queue support data
-static void (*__libdispatch_workerfunction)(pthread_priority_t) = NULL;
-static void (*__libdispatch_keventfunction)(void **events, int *nevents) = NULL;
-static void (*__libdispatch_workloopfunction)(uint64_t *workloop_id, void **events, int *nevents) = NULL;
+PTHREAD_NORETURN
+static void
+__pthread_invalid_keventfunction(void **events, int *nevents)
+{
+       PTHREAD_CLIENT_CRASH(0, "Invalid kqworkq setup");
+}
+
+PTHREAD_NORETURN
+static void
+__pthread_invalid_workloopfunction(uint64_t *workloop_id, void **events, int *nevents)
+{
+       PTHREAD_CLIENT_CRASH(0, "Invalid kqwl setup");
+}
+static pthread_workqueue_function2_t __libdispatch_workerfunction;
+static pthread_workqueue_function_kevent_t __libdispatch_keventfunction = &__pthread_invalid_keventfunction;
+static pthread_workqueue_function_workloop_t __libdispatch_workloopfunction = &__pthread_invalid_workloopfunction;
 static int __libdispatch_offset;
 static int __libdispatch_offset;
+static int __pthread_supported_features; // supported feature set
 
 
-// supported feature set
-int __pthread_supported_features;
-static bool __workq_newapi;
+#if defined(__i386__) || defined(__x86_64__)
+static mach_vm_address_t __pthread_stack_hint = 0xB0000000;
+#else
+#error no __pthread_stack_hint for this architecture
+#endif
 
 //
 // Function prototypes
 //
 
 // pthread primitives
 
 //
 // Function prototypes
 //
 
 // pthread primitives
-static int _pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack);
-static int _pthread_deallocate(pthread_t t);
-
-static void _pthread_terminate_invoke(pthread_t t);
-
-static inline void _pthread_struct_init(pthread_t t,
-       const pthread_attr_t *attrs,
-       void *stack,
-       size_t stacksize,
-       void *freeaddr,
-       size_t freesize);
+static inline void _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs,
+               void *stack, size_t stacksize, void *freeaddr, size_t freesize);
 
 
+#if VARIANT_DYLD
+static void _pthread_set_self_dyld(void);
+#endif // VARIANT_DYLD
 static inline void _pthread_set_self_internal(pthread_t, bool needs_tsd_base_set);
 
 static void _pthread_dealloc_reply_port(pthread_t t);
 static void _pthread_dealloc_special_reply_port(pthread_t t);
 
 static inline void _pthread_set_self_internal(pthread_t, bool needs_tsd_base_set);
 
 static void _pthread_dealloc_reply_port(pthread_t t);
 static void _pthread_dealloc_special_reply_port(pthread_t t);
 
-static inline void __pthread_add_thread(pthread_t t, const pthread_attr_t *attr, bool parent, bool from_mach_thread);
-static inline int __pthread_remove_thread(pthread_t t, bool child, bool *should_exit);
+static inline void __pthread_started_thread(pthread_t t);
 
 static void _pthread_exit(pthread_t self, void *value_ptr) __dead2;
 
 
 static void _pthread_exit(pthread_t self, void *value_ptr) __dead2;
 
-static inline void _pthread_introspection_thread_create(pthread_t t, bool destroy);
+static inline void _pthread_introspection_thread_create(pthread_t t);
 static inline void _pthread_introspection_thread_start(pthread_t t);
 static inline void _pthread_introspection_thread_start(pthread_t t);
-static inline void _pthread_introspection_thread_terminate(pthread_t t, void *freeaddr, size_t freesize, bool destroy);
+static inline void _pthread_introspection_thread_terminate(pthread_t t);
 static inline void _pthread_introspection_thread_destroy(pthread_t t);
 
 extern void _pthread_set_self(pthread_t);
 extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse); // trampoline into _pthread_wqthread
 extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); // trampoline into _pthread_start
 
 static inline void _pthread_introspection_thread_destroy(pthread_t t);
 
 extern void _pthread_set_self(pthread_t);
 extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse); // trampoline into _pthread_wqthread
 extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); // trampoline into _pthread_start
 
-/* Compatibility: previous pthread API used WORKQUEUE_OVERCOMMIT to request overcommit threads from
- * the kernel. This definition is kept here, in userspace only, to perform the compatibility shimm
- * from old API requests to the new kext conventions.
- */
-#define WORKQUEUE_OVERCOMMIT 0x10000
-
 /*
  * Flags filed passed to bsdthread_create and back in pthread_start
 /*
  * Flags filed passed to bsdthread_create and back in pthread_start
-31  <---------------------------------> 0
-_________________________________________
-| flags(8) | policy(8) | importance(16) |
------------------------------------------
-*/
-
-#define PTHREAD_START_CUSTOM           0x01000000
+ * 31  <---------------------------------> 0
+ * _________________________________________
+ * | flags(8) | policy(8) | importance(16) |
+ * -----------------------------------------
+ */
+#define PTHREAD_START_CUSTOM           0x01000000 // <rdar://problem/34501401>
 #define PTHREAD_START_SETSCHED         0x02000000
 #define PTHREAD_START_SETSCHED         0x02000000
-#define PTHREAD_START_DETACHED         0x04000000
+// was PTHREAD_START_DETACHED          0x04000000
 #define PTHREAD_START_QOSCLASS         0x08000000
 #define PTHREAD_START_TSD_BASE_SET     0x10000000
 #define PTHREAD_START_QOSCLASS         0x08000000
 #define PTHREAD_START_TSD_BASE_SET     0x10000000
+#define PTHREAD_START_SUSPENDED                0x20000000
 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
 #define PTHREAD_START_POLICY_BITSHIFT 16
 #define PTHREAD_START_POLICY_MASK 0xff
 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
 
 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
 #define PTHREAD_START_POLICY_BITSHIFT 16
 #define PTHREAD_START_POLICY_MASK 0xff
 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
 
-static int pthread_setschedparam_internal(pthread_t, mach_port_t, int, const struct sched_param *);
+#if (!defined(__OPEN_SOURCE__) && TARGET_OS_OSX) || OS_VARIANT_RESOLVED // 40703288
+static int pthread_setschedparam_internal(pthread_t, mach_port_t, int,
+               const struct sched_param *);
+#endif
+
 extern pthread_t __bsdthread_create(void *(*func)(void *), void * func_arg, void * stack, pthread_t  thread, unsigned int flags);
 extern int __bsdthread_register(void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, void *, int), int,void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), int32_t *,__uint64_t);
 extern int __bsdthread_terminate(void * freeaddr, size_t freesize, mach_port_t kport, mach_port_t joinsem);
 extern __uint64_t __thread_selfid( void );
 
 extern pthread_t __bsdthread_create(void *(*func)(void *), void * func_arg, void * stack, pthread_t  thread, unsigned int flags);
 extern int __bsdthread_register(void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, void *, int), int,void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), int32_t *,__uint64_t);
 extern int __bsdthread_terminate(void * freeaddr, size_t freesize, mach_port_t kport, mach_port_t joinsem);
 extern __uint64_t __thread_selfid( void );
 
-extern int __workq_open(void);
-extern int __workq_kernreturn(int, void *, int, int);
-
-#if defined(__i386__) || defined(__x86_64__)
-static const mach_vm_address_t PTHREAD_STACK_HINT = 0xB0000000;
+#if __LP64__
+_Static_assert(offsetof(struct _pthread, tsd) == 224, "TSD LP64 offset");
 #else
 #else
-#error no PTHREAD_STACK_HINT for this architecture
+_Static_assert(offsetof(struct _pthread, tsd) == 176, "TSD ILP32 offset");
 #endif
 #endif
-
-// Check that offsets of _PTHREAD_STRUCT_DIRECT_*_OFFSET values hasn't changed
 _Static_assert(offsetof(struct _pthread, tsd) + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET
                == offsetof(struct _pthread, thread_id),
                "_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET is correct");
 
 _Static_assert(offsetof(struct _pthread, tsd) + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET
                == offsetof(struct _pthread, thread_id),
                "_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET is correct");
 
-// Allocate a thread structure, stack and guard page.
-//
-// The thread structure may optionally be placed in the same allocation as the
-// stack, residing above the top of the stack. This cannot be done if a
-// custom stack address is provided.
-//
-// Similarly the guard page cannot be allocated if a custom stack address is
-// provided.
-//
-// The allocated thread structure is initialized with values that indicate how
-// it should be freed.
-
-static int
-_pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack)
-{
-       int res;
-       kern_return_t kr;
-       pthread_t t = NULL;
-       mach_vm_address_t allocaddr = PTHREAD_STACK_HINT;
-       size_t allocsize = 0;
-       size_t guardsize = 0;
-       size_t stacksize = 0;
-
-       PTHREAD_ASSERT(attrs->stacksize >= PTHREAD_STACK_MIN);
-
-       *thread = NULL;
-       *stack = NULL;
-
-       // Allocate a pthread structure if necessary
-
-       if (attrs->stackaddr != NULL) {
-               PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0);
-               *stack = attrs->stackaddr;
-               allocsize = PTHREAD_SIZE;
-       } else {
-               guardsize = attrs->guardsize;
-               stacksize = attrs->stacksize;
-               allocsize = stacksize + guardsize + PTHREAD_SIZE;
-       }
-
-       kr = mach_vm_map(mach_task_self(),
-                        &allocaddr,
-                        allocsize,
-                        vm_page_size - 1,
-                        VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE,
-                        MEMORY_OBJECT_NULL,
-                        0,
-                        FALSE,
-                        VM_PROT_DEFAULT,
-                        VM_PROT_ALL,
-                        VM_INHERIT_DEFAULT);
-
-       if (kr != KERN_SUCCESS) {
-               kr = mach_vm_allocate(mach_task_self(),
-                                &allocaddr,
-                                allocsize,
-                                VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
-       }
-
-       if (kr == KERN_SUCCESS) {
-               // The stack grows down.
-               // Set the guard page at the lowest address of the
-               // newly allocated stack. Return the highest address
-               // of the stack.
-               if (guardsize) {
-                       (void)mach_vm_protect(mach_task_self(), allocaddr, guardsize, FALSE, VM_PROT_NONE);
-               }
-
-               // Thread structure resides at the top of the stack.
-               t = (void *)(allocaddr + stacksize + guardsize);
-               if (stacksize) {
-                       // Returns the top of the stack.
-                       *stack = t;
-               }
-       }
-
-       if (t != NULL) {
-               _pthread_struct_init(t, attrs,
-                                    *stack, attrs->stacksize,
-                                    allocaddr, allocsize);
-               *thread = t;
-               res = 0;
-       } else {
-               res = EAGAIN;
-       }
-        return res;
-}
-
-static int
-_pthread_deallocate(pthread_t t)
-{
-       // Don't free the main thread.
-       if (t != &_thread) {
-               kern_return_t ret;
-               ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
-               PTHREAD_ASSERT(ret == KERN_SUCCESS);
-       }
-       return 0;
-}
-
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wreturn-stack-address"
-
-PTHREAD_NOINLINE
-static void*
-_pthread_current_stack_address(void)
-{
-       int a;
-       return &a;
-}
-
-#pragma clang diagnostic pop
-
-// Terminates the thread if called from the currently running thread.
-PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED
-static void
-_pthread_terminate(pthread_t t)
-{
-       PTHREAD_ASSERT(t == pthread_self());
-
-       uintptr_t freeaddr = (uintptr_t)t->freeaddr;
-       size_t freesize = t->freesize;
-
-       // the size of just the stack
-       size_t freesize_stack = t->freesize;
-
-       // We usually pass our structure+stack to bsdthread_terminate to free, but
-       // if we get told to keep the pthread_t structure around then we need to
-       // adjust the free size and addr in the pthread_t to just refer to the
-       // structure and not the stack.  If we do end up deallocating the
-       // structure, this is useless work since no one can read the result, but we
-       // can't do it after the call to pthread_remove_thread because it isn't
-       // safe to dereference t after that.
-       if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){
-               // Check to ensure the pthread structure itself is part of the
-               // allocation described by freeaddr/freesize, in which case we split and
-               // only deallocate the area below the pthread structure.  In the event of a
-               // custom stack, the freeaddr/size will be the pthread structure itself, in
-               // which case we shouldn't free anything (the final else case).
-               freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr);
-
-               // describe just the remainder for deallocation when the pthread_t goes away
-               t->freeaddr += freesize_stack;
-               t->freesize -= freesize_stack;
-       } else if (t == &_thread){
-               freeaddr = t->stackaddr - pthread_get_stacksize_np(t);
-               uintptr_t stackborder = trunc_page((uintptr_t)_pthread_current_stack_address());
-               freesize_stack = stackborder - freeaddr;
-       } else {
-               freesize_stack = 0;
-       }
-
-       mach_port_t kport = _pthread_kernel_thread(t);
-       semaphore_t joinsem = t->joiner_notify;
-
-       _pthread_dealloc_special_reply_port(t);
-       _pthread_dealloc_reply_port(t);
+#pragma mark pthread attrs
 
 
-       // After the call to __pthread_remove_thread, it is not safe to
-       // dereference the pthread_t structure.
-
-       bool destroy, should_exit;
-       destroy = (__pthread_remove_thread(t, true, &should_exit) != EBUSY);
-
-       if (!destroy || t == &_thread) {
-               // Use the adjusted freesize of just the stack that we computed above.
-               freesize = freesize_stack;
-       }
-
-       // Check if there is nothing to free because the thread has a custom
-       // stack allocation and is joinable.
-       if (freesize == 0) {
-               freeaddr = 0;
-       }
-       _pthread_introspection_thread_terminate(t, freeaddr, freesize, destroy);
-       if (should_exit) {
-               exitf(0);
-       }
-
-       __bsdthread_terminate((void *)freeaddr, freesize, kport, joinsem);
-       PTHREAD_ABORT("thread %p didn't terminate", t);
-}
-
-PTHREAD_NORETURN
-static void
-_pthread_terminate_invoke(pthread_t t)
-{
-       _pthread_terminate(t);
-}
+_Static_assert(sizeof(struct _pthread_attr_t) == sizeof(__darwin_pthread_attr_t),
+               "internal pthread_attr_t == external pthread_attr_t");
 
 int
 pthread_attr_destroy(pthread_attr_t *attr)
 
 int
 pthread_attr_destroy(pthread_attr_t *attr)
@@ -467,12 +316,24 @@ pthread_attr_getinheritsched(const pthread_attr_t *attr, int *inheritsched)
        return ret;
 }
 
        return ret;
 }
 
+static PTHREAD_ALWAYS_INLINE void
+_pthread_attr_get_schedparam(const pthread_attr_t *attr,
+               struct sched_param *param)
+{
+       if (attr->schedset) {
+               *param = attr->param;
+       } else {
+               param->sched_priority = default_priority;
+               param->quantum = 10; /* quantum isn't public yet */
+       }
+}
+
 int
 pthread_attr_getschedparam(const pthread_attr_t *attr, struct sched_param *param)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG) {
 int
 pthread_attr_getschedparam(const pthread_attr_t *attr, struct sched_param *param)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG) {
-               *param = attr->param;
+               _pthread_attr_get_schedparam(attr, param);
                ret = 0;
        }
        return ret;
                ret = 0;
        }
        return ret;
@@ -489,24 +350,10 @@ pthread_attr_getschedpolicy(const pthread_attr_t *attr, int *policy)
        return ret;
 }
 
        return ret;
 }
 
-// Default stack size is 512KB; independent of the main thread's stack size.
-static const size_t DEFAULT_STACK_SIZE = 512 * 1024;
-
 int
 pthread_attr_init(pthread_attr_t *attr)
 {
 int
 pthread_attr_init(pthread_attr_t *attr)
 {
-       attr->stacksize = DEFAULT_STACK_SIZE;
-       attr->stackaddr = NULL;
-       attr->sig = _PTHREAD_ATTR_SIG;
-       attr->param.sched_priority = default_priority;
-       attr->param.quantum = 10; /* quantum isn't public yet */
-       attr->detached = PTHREAD_CREATE_JOINABLE;
-       attr->inherit = _PTHREAD_DEFAULT_INHERITSCHED;
-       attr->policy = _PTHREAD_DEFAULT_POLICY;
-       attr->fastpath = 1;
-       attr->schedset = 0;
-       attr->guardsize = vm_page_size;
-       attr->qosclass = _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, 0);
+       *attr = _pthread_attr_default;
        return 0;
 }
 
        return 0;
 }
 
@@ -515,8 +362,8 @@ pthread_attr_setdetachstate(pthread_attr_t *attr, int detachstate)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
-           (detachstate == PTHREAD_CREATE_JOINABLE ||
-            detachstate == PTHREAD_CREATE_DETACHED)) {
+                       (detachstate == PTHREAD_CREATE_JOINABLE ||
+                       detachstate == PTHREAD_CREATE_DETACHED)) {
                attr->detached = detachstate;
                ret = 0;
        }
                attr->detached = detachstate;
                ret = 0;
        }
@@ -528,8 +375,8 @@ pthread_attr_setinheritsched(pthread_attr_t *attr, int inheritsched)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
-           (inheritsched == PTHREAD_INHERIT_SCHED ||
-            inheritsched == PTHREAD_EXPLICIT_SCHED)) {
+                       (inheritsched == PTHREAD_INHERIT_SCHED ||
+                       inheritsched == PTHREAD_EXPLICIT_SCHED)) {
                attr->inherit = inheritsched;
                ret = 0;
        }
                attr->inherit = inheritsched;
                ret = 0;
        }
@@ -553,12 +400,14 @@ int
 pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy)
 {
        int ret = EINVAL;
 pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy)
 {
        int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG &&
-           (policy == SCHED_OTHER ||
-            policy == SCHED_RR ||
-            policy == SCHED_FIFO)) {
+       if (attr->sig == _PTHREAD_ATTR_SIG && (policy == SCHED_OTHER ||
+                       policy == SCHED_RR || policy == SCHED_FIFO)) {
+               if (!_PTHREAD_POLICY_IS_FIXEDPRI(policy)) {
+                       /* non-fixedpri policy should remove cpupercent */
+                       attr->cpupercentset = 0;
+               }
                attr->policy = policy;
                attr->policy = policy;
-               attr->schedset = 1;
+               attr->policyset = 1;
                ret = 0;
        }
        return ret;
                ret = 0;
        }
        return ret;
@@ -606,21 +455,27 @@ pthread_attr_setstackaddr(pthread_attr_t *attr, void *stackaddr)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
-           ((uintptr_t)stackaddr % vm_page_size) == 0) {
+                       ((uintptr_t)stackaddr % vm_page_size) == 0) {
                attr->stackaddr = stackaddr;
                attr->stackaddr = stackaddr;
-               attr->fastpath = 0;
+               attr->defaultguardpage = false;
                attr->guardsize = 0;
                ret = 0;
        }
        return ret;
 }
 
                attr->guardsize = 0;
                ret = 0;
        }
        return ret;
 }
 
+static inline size_t
+_pthread_attr_stacksize(const pthread_attr_t *attr)
+{
+       return attr->stacksize ? attr->stacksize : DEFAULT_STACK_SIZE;
+}
+
 int
 pthread_attr_getstacksize(const pthread_attr_t *attr, size_t *stacksize)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG) {
 int
 pthread_attr_getstacksize(const pthread_attr_t *attr, size_t *stacksize)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG) {
-               *stacksize = attr->stacksize;
+               *stacksize = _pthread_attr_stacksize(attr);
                ret = 0;
        }
        return ret;
                ret = 0;
        }
        return ret;
@@ -631,70 +486,356 @@ pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
-           (stacksize % vm_page_size) == 0 &&
-           stacksize >= PTHREAD_STACK_MIN) {
+                       (stacksize % vm_page_size) == 0 &&
+                       stacksize >= PTHREAD_STACK_MIN) {
+               attr->stacksize = stacksize;
+               ret = 0;
+       }
+       return ret;
+}
+
+int
+pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t * stacksize)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG) {
+               *stackaddr = (void *)((uintptr_t)attr->stackaddr - attr->stacksize);
+               *stacksize = _pthread_attr_stacksize(attr);
+               ret = 0;
+       }
+       return ret;
+}
+
+// Per SUSv3, the stackaddr is the base address, the lowest addressable byte
+// address. This is not the same as in pthread_attr_setstackaddr.
+int
+pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG &&
+                       ((uintptr_t)stackaddr % vm_page_size) == 0 &&
+                       (stacksize % vm_page_size) == 0 &&
+                       stacksize >= PTHREAD_STACK_MIN) {
+               attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize);
                attr->stacksize = stacksize;
                ret = 0;
        }
                attr->stacksize = stacksize;
                ret = 0;
        }
-       return ret;
+       return ret;
+}
+
+int
+pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG && (guardsize % vm_page_size) == 0) {
+               /* Guardsize of 0 is valid, means no guard */
+               attr->defaultguardpage = false;
+               attr->guardsize = guardsize;
+               ret = 0;
+       }
+       return ret;
+}
+
+static inline size_t
+_pthread_attr_guardsize(const pthread_attr_t *attr)
+{
+       return attr->defaultguardpage ? vm_page_size : attr->guardsize;
+}
+
+int
+pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG) {
+               *guardsize = _pthread_attr_guardsize(attr);
+               ret = 0;
+       }
+       return ret;
+}
+
+int
+pthread_attr_setcpupercent_np(pthread_attr_t *attr, int percent,
+               unsigned long refillms)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG && percent < UINT8_MAX &&
+                       refillms < _PTHREAD_ATTR_REFILLMS_MAX && attr->policyset &&
+                       _PTHREAD_POLICY_IS_FIXEDPRI(attr->policy)) {
+               attr->cpupercent = percent;
+               attr->refillms = (uint32_t)(refillms & 0x00ffffff);
+               attr->cpupercentset = 1;
+               ret = 0;
+       }
+       return ret;
+}
+
+#pragma mark pthread lifetime
+
+// Allocate a thread structure, stack and guard page.
+//
+// The thread structure may optionally be placed in the same allocation as the
+// stack, residing above the top of the stack. This cannot be done if a
+// custom stack address is provided.
+//
+// Similarly the guard page cannot be allocated if a custom stack address is
+// provided.
+//
+// The allocated thread structure is initialized with values that indicate how
+// it should be freed.
+
+static pthread_t
+_pthread_allocate(const pthread_attr_t *attrs, void **stack)
+{
+       mach_vm_address_t allocaddr = __pthread_stack_hint;
+       size_t allocsize, guardsize, stacksize;
+       kern_return_t kr;
+       pthread_t t;
+
+       PTHREAD_ASSERT(attrs->stacksize == 0 ||
+                       attrs->stacksize >= PTHREAD_STACK_MIN);
+
+       // Allocate a pthread structure if necessary
+
+       if (attrs->stackaddr != NULL) {
+               PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0);
+               allocsize = PTHREAD_SIZE;
+               guardsize = 0;
+               // <rdar://problem/42588315> if the attrs struct specifies a custom
+               // stack address but not a custom size, using ->stacksize here instead
+               // of _pthread_attr_stacksize stores stacksize as zero, indicating
+               // that the stack size is unknown.
+               stacksize = attrs->stacksize;
+       } else {
+               guardsize = _pthread_attr_guardsize(attrs);
+               stacksize = _pthread_attr_stacksize(attrs) + PTHREAD_T_OFFSET;
+               allocsize = stacksize + guardsize + PTHREAD_SIZE;
+               allocsize = mach_vm_round_page(allocsize);
+       }
+
+       kr = mach_vm_map(mach_task_self(), &allocaddr, allocsize, vm_page_size - 1,
+                        VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL,
+                        0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+
+       if (kr != KERN_SUCCESS) {
+               kr = mach_vm_allocate(mach_task_self(), &allocaddr, allocsize,
+                                VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
+       }
+       if (kr != KERN_SUCCESS) {
+               *stack  = NULL;
+               return NULL;
+       }
+
+       // The stack grows down.
+       // Set the guard page at the lowest address of the
+       // newly allocated stack. Return the highest address
+       // of the stack.
+       if (guardsize) {
+               (void)mach_vm_protect(mach_task_self(), allocaddr, guardsize,
+                               FALSE, VM_PROT_NONE);
+       }
+
+       // Thread structure resides at the top of the stack (when using a
+       // custom stack, allocsize == PTHREAD_SIZE, so places the pthread_t
+       // at allocaddr).
+       t = (pthread_t)(allocaddr + allocsize - PTHREAD_SIZE);
+       if (attrs->stackaddr) {
+               *stack = attrs->stackaddr;
+       } else {
+               *stack = t;
+       }
+
+       _pthread_struct_init(t, attrs, *stack, stacksize, allocaddr, allocsize);
+       return t;
+}
+
+PTHREAD_NOINLINE
+void
+_pthread_deallocate(pthread_t t, bool from_mach_thread)
+{
+       kern_return_t ret;
+
+       // Don't free the main thread.
+       if (t != main_thread()) {
+               if (!from_mach_thread) { // see __pthread_add_thread
+                       _pthread_introspection_thread_destroy(t);
+               }
+               ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
+               PTHREAD_ASSERT(ret == KERN_SUCCESS);
+       }
 }
 
 }
 
-int
-pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t * stacksize)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreturn-stack-address"
+
+PTHREAD_NOINLINE
+static void*
+_pthread_current_stack_address(void)
 {
 {
-       int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG) {
-               *stackaddr = (void *)((uintptr_t)attr->stackaddr - attr->stacksize);
-               *stacksize = attr->stacksize;
-               ret = 0;
-       }
-       return ret;
+       int a;
+       return &a;
 }
 
 }
 
-// Per SUSv3, the stackaddr is the base address, the lowest addressable byte
-// address. This is not the same as in pthread_attr_setstackaddr.
-int
-pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize)
+#pragma clang diagnostic pop
+
+void
+_pthread_joiner_wake(pthread_t thread)
 {
 {
-       int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG &&
-           ((uintptr_t)stackaddr % vm_page_size) == 0 &&
-           (stacksize % vm_page_size) == 0 &&
-           stacksize >= PTHREAD_STACK_MIN) {
-               attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize);
-               attr->stacksize = stacksize;
-               attr->fastpath = 0;
-               ret = 0;
+       uint32_t *exit_gate = &thread->tl_exit_gate;
+
+       for (;;) {
+               int ret = __ulock_wake(UL_UNFAIR_LOCK | ULF_NO_ERRNO, exit_gate, 0);
+               if (ret == 0 || ret == -ENOENT) {
+                       return;
+               }
+               if (ret != -EINTR) {
+                       PTHREAD_INTERNAL_CRASH(-ret, "pthread_join() wake failure");
+               }
        }
        }
-       return ret;
 }
 
 }
 
-int
-pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize)
+// Terminates the thread if called from the currently running thread.
+PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED
+static void
+_pthread_terminate(pthread_t t, void *exit_value)
 {
 {
-       int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG) {
-               /* Guardsize of 0 is valid, ot means no guard */
-               if ((guardsize % vm_page_size) == 0) {
-                       attr->guardsize = guardsize;
-                       attr->fastpath = 0;
-                       ret = 0;
+       PTHREAD_ASSERT(t == pthread_self());
+
+       _pthread_introspection_thread_terminate(t);
+
+       uintptr_t freeaddr = (uintptr_t)t->freeaddr;
+       size_t freesize = t->freesize;
+       bool should_exit;
+
+       // the size of just the stack
+       size_t freesize_stack = t->freesize;
+
+       // We usually pass our structure+stack to bsdthread_terminate to free, but
+       // if we get told to keep the pthread_t structure around then we need to
+       // adjust the free size and addr in the pthread_t to just refer to the
+       // structure and not the stack.  If we do end up deallocating the
+       // structure, this is useless work since no one can read the result, but we
+       // can't do it after the call to pthread_remove_thread because it isn't
+       // safe to dereference t after that.
+       if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){
+               // Check to ensure the pthread structure itself is part of the
+               // allocation described by freeaddr/freesize, in which case we split and
+               // only deallocate the area below the pthread structure.  In the event of a
+               // custom stack, the freeaddr/size will be the pthread structure itself, in
+               // which case we shouldn't free anything (the final else case).
+               freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr);
+
+               // describe just the remainder for deallocation when the pthread_t goes away
+               t->freeaddr += freesize_stack;
+               t->freesize -= freesize_stack;
+       } else if (t == main_thread()) {
+               freeaddr = t->stackaddr - pthread_get_stacksize_np(t);
+               uintptr_t stackborder = trunc_page((uintptr_t)_pthread_current_stack_address());
+               freesize_stack = stackborder - freeaddr;
+       } else {
+               freesize_stack = 0;
+       }
+
+       mach_port_t kport = _pthread_kernel_thread(t);
+       bool keep_thread_struct = false, needs_wake = false;
+       semaphore_t custom_stack_sema = MACH_PORT_NULL;
+
+       _pthread_dealloc_special_reply_port(t);
+       _pthread_dealloc_reply_port(t);
+
+       _PTHREAD_LOCK(_pthread_list_lock);
+
+       // This piece of code interacts with pthread_join. It will always:
+       // - set tl_exit_gate to MACH_PORT_DEAD (thread exited)
+       // - set tl_exit_value to the value passed to pthread_exit()
+       // - decrement _pthread_count, so that we can exit the process when all
+       //   threads exited even if not all of them were joined.
+       t->tl_exit_gate = MACH_PORT_DEAD;
+       t->tl_exit_value = exit_value;
+       should_exit = (--_pthread_count <= 0);
+
+       // If we see a joiner, we prepost that the join has to succeed,
+       // and the joiner is committed to finish (even if it was canceled)
+       if (t->tl_join_ctx) {
+               custom_stack_sema = _pthread_joiner_prepost_wake(t); // unsets tl_joinable
+               needs_wake = true;
+       }
+
+       // Joinable threads that have no joiner yet are kept on the thread list
+       // so that pthread_join() can later discover the thread when it is joined,
+       // and will have to do the pthread_t cleanup.
+       if (t->tl_joinable) {
+               t->tl_joiner_cleans_up = keep_thread_struct = true;
+       } else {
+               TAILQ_REMOVE(&__pthread_head, t, tl_plist);
+       }
+
+       _PTHREAD_UNLOCK(_pthread_list_lock);
+
+       if (needs_wake) {
+               // When we found a waiter, we want to drop the very contended list lock
+               // before we do the syscall in _pthread_joiner_wake(). Then, we decide
+               // who gets to cleanup the pthread_t between the joiner and the exiting
+               // thread:
+               // - the joiner tries to set tl_join_ctx to NULL
+               // - the exiting thread tries to set tl_joiner_cleans_up to true
+               // Whoever does it first commits the other guy to cleanup the pthread_t
+               _pthread_joiner_wake(t);
+               _PTHREAD_LOCK(_pthread_list_lock);
+               if (t->tl_join_ctx) {
+                       t->tl_joiner_cleans_up = true;
+                       keep_thread_struct = true;
                }
                }
+               _PTHREAD_UNLOCK(_pthread_list_lock);
        }
        }
-       return ret;
-}
 
 
-int
-pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize)
-{
-       int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG) {
-               *guardsize = attr->guardsize;
-               ret = 0;
+       //
+       // /!\ dereferencing `t` past this point is not safe /!\
+       //
+
+       if (keep_thread_struct || t == main_thread()) {
+               // Use the adjusted freesize of just the stack that we computed above.
+               freesize = freesize_stack;
+       } else {
+               _pthread_introspection_thread_destroy(t);
        }
        }
-       return ret;
+
+       // Check if there is nothing to free because the thread has a custom
+       // stack allocation and is joinable.
+       if (freesize == 0) {
+               freeaddr = 0;
+       }
+       if (should_exit) {
+               exitf(0);
+       }
+       __bsdthread_terminate((void *)freeaddr, freesize, kport, custom_stack_sema);
+       PTHREAD_INTERNAL_CRASH(t, "thread didn't terminate");
+}
+
+PTHREAD_NORETURN
+static void
+_pthread_terminate_invoke(pthread_t t, void *exit_value)
+{
+#if PTHREAD_T_OFFSET
+       void *p = NULL;
+       // <rdar://problem/25688492> During pthread termination there is a race
+       // between pthread_join and pthread_terminate; if the joiner is responsible
+       // for cleaning up the pthread_t struct, then it may destroy some part of the
+       // stack with it on 16k OSes. So that this doesn't cause _pthread_terminate()
+       // to crash because its stack has been removed from under its feet, just make
+       // sure termination happens in a part of the stack that is not on the same
+       // page as the pthread_t.
+       if (trunc_page((uintptr_t)__builtin_frame_address(0)) ==
+                       trunc_page((uintptr_t)t)) {
+               p = alloca(PTHREAD_T_OFFSET);
+       }
+       // And this __asm__ volatile is needed to stop the compiler from optimising
+       // away the alloca() completely.
+       __asm__ volatile ("" : : "r"(p) );
+#endif
+       _pthread_terminate(t, exit_value);
 }
 
 }
 
+#pragma mark pthread start / body
 
 /*
  * Create and start execution of a new thread.
 
 /*
  * Create and start execution of a new thread.
@@ -704,51 +845,22 @@ static void
 _pthread_body(pthread_t self, bool needs_tsd_base_set)
 {
        _pthread_set_self_internal(self, needs_tsd_base_set);
 _pthread_body(pthread_t self, bool needs_tsd_base_set)
 {
        _pthread_set_self_internal(self, needs_tsd_base_set);
-       __pthread_add_thread(self, NULL, false, false);
-       void *result = (self->fun)(self->arg);
-
-       _pthread_exit(self, result);
+       __pthread_started_thread(self);
+       _pthread_exit(self, (self->fun)(self->arg));
 }
 
 PTHREAD_NORETURN
 void
 }
 
 PTHREAD_NORETURN
 void
-_pthread_start(pthread_t self,
-              mach_port_t kport,
-              void *(*fun)(void *),
-              void *arg,
-              size_t stacksize,
-              unsigned int pflags)
-{
-       if ((pflags & PTHREAD_START_CUSTOM) == 0) {
-               void *stackaddr = self;
-               _pthread_struct_init(self, &_pthread_attr_default,
-                               stackaddr, stacksize,
-                               PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize));
-
-               if (pflags & PTHREAD_START_SETSCHED) {
-                       self->policy = ((pflags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK);
-                       self->param.sched_priority = (pflags & PTHREAD_START_IMPORTANCE_MASK);
-               }
-
-               if ((pflags & PTHREAD_START_DETACHED) == PTHREAD_START_DETACHED)  {
-                       self->detached &= ~PTHREAD_CREATE_JOINABLE;
-                       self->detached |= PTHREAD_CREATE_DETACHED;
-               }
-       }
-
-       if ((pflags & PTHREAD_START_QOSCLASS) != 0) {
-               /* The QoS class is cached in the TSD of the pthread, so to reflect the
-                * class that the kernel brought us up at, the TSD must be primed from the
-                * flags parameter.
-                */
-               self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (pflags & PTHREAD_START_QOSCLASS_MASK);
-       } else {
-               /* Give the thread a default QoS tier, of zero. */
-               self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
-       }
-
+_pthread_start(pthread_t self, mach_port_t kport,
+               __unused void *(*fun)(void *), __unused void *arg,
+               __unused size_t stacksize, unsigned int pflags)
+{
        bool thread_tsd_bsd_set = (bool)(pflags & PTHREAD_START_TSD_BASE_SET);
 
        bool thread_tsd_bsd_set = (bool)(pflags & PTHREAD_START_TSD_BASE_SET);
 
+       if (os_unlikely(pflags & PTHREAD_START_SUSPENDED)) {
+               PTHREAD_INTERNAL_CRASH(0,
+                               "kernel without PTHREAD_START_SUSPENDED support");
+       }
 #if DEBUG
        PTHREAD_ASSERT(MACH_PORT_VALID(kport));
        PTHREAD_ASSERT(_pthread_kernel_thread(self) == kport);
 #if DEBUG
        PTHREAD_ASSERT(MACH_PORT_VALID(kport));
        PTHREAD_ASSERT(_pthread_kernel_thread(self) == kport);
@@ -756,20 +868,13 @@ _pthread_start(pthread_t self,
        // will mark the thread initialized
        _pthread_markcancel_if_canceled(self, kport);
 
        // will mark the thread initialized
        _pthread_markcancel_if_canceled(self, kport);
 
-       self->fun = fun;
-       self->arg = arg;
-
        _pthread_body(self, !thread_tsd_bsd_set);
 }
 
 PTHREAD_ALWAYS_INLINE
 static inline void
        _pthread_body(self, !thread_tsd_bsd_set);
 }
 
 PTHREAD_ALWAYS_INLINE
 static inline void
-_pthread_struct_init(pthread_t t,
-                    const pthread_attr_t *attrs,
-                    void *stackaddr,
-                    size_t stacksize,
-                    void *freeaddr,
-                    size_t freesize)
+_pthread_struct_init(pthread_t t, const pthread_attr_t *attrs,
+               void *stackaddr, size_t stacksize, void *freeaddr, size_t freesize)
 {
 #if DEBUG
        PTHREAD_ASSERT(t->sig != _PTHREAD_SIG);
 {
 #if DEBUG
        PTHREAD_ASSERT(t->sig != _PTHREAD_SIG);
@@ -777,23 +882,34 @@ _pthread_struct_init(pthread_t t,
 
        t->sig = _PTHREAD_SIG;
        t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = t;
 
        t->sig = _PTHREAD_SIG;
        t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = t;
-       t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+       t->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &t->err_no;
+       if (attrs->schedset == 0) {
+               t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass;
+       } else {
+               t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] =
+                               _pthread_unspecified_priority();
+       }
+       t->tsd[_PTHREAD_TSD_SLOT_PTR_MUNGE] = _pthread_ptr_munge_token;
+       t->tl_has_custom_stack = (attrs->stackaddr != NULL);
+
        _PTHREAD_LOCK_INIT(t->lock);
 
        t->stackaddr = stackaddr;
        _PTHREAD_LOCK_INIT(t->lock);
 
        t->stackaddr = stackaddr;
-       t->stacksize = stacksize;
+       t->stackbottom = stackaddr - stacksize;
        t->freeaddr = freeaddr;
        t->freesize = freesize;
 
        t->freeaddr = freeaddr;
        t->freesize = freesize;
 
-       t->guardsize = attrs->guardsize;
-       t->detached = attrs->detached;
+       t->guardsize = _pthread_attr_guardsize(attrs);
+       t->tl_joinable = (attrs->detached == PTHREAD_CREATE_JOINABLE);
        t->inherit = attrs->inherit;
        t->inherit = attrs->inherit;
-       t->policy = attrs->policy;
+       t->tl_policy = attrs->policy;
        t->schedset = attrs->schedset;
        t->schedset = attrs->schedset;
-       t->param = attrs->param;
+       _pthread_attr_get_schedparam(attrs, &t->tl_param);
        t->cancel_state = PTHREAD_CANCEL_ENABLE | PTHREAD_CANCEL_DEFERRED;
 }
 
        t->cancel_state = PTHREAD_CANCEL_ENABLE | PTHREAD_CANCEL_DEFERRED;
 }
 
+#pragma mark pthread public interface
+
 /* Need to deprecate this in future */
 int
 _pthread_is_threaded(void)
 /* Need to deprecate this in future */
 int
 _pthread_is_threaded(void)
@@ -818,7 +934,7 @@ mach_port_t
 pthread_mach_thread_np(pthread_t t)
 {
        mach_port_t kport = MACH_PORT_NULL;
 pthread_mach_thread_np(pthread_t t)
 {
        mach_port_t kport = MACH_PORT_NULL;
-       (void)_pthread_is_valid(t, 0, &kport);
+       (void)_pthread_is_valid(t, &kport);
        return kport;
 }
 
        return kport;
 }
 
@@ -831,7 +947,7 @@ pthread_from_mach_thread_np(mach_port_t kernel_thread)
        /* No need to wait as mach port is already known */
        _PTHREAD_LOCK(_pthread_list_lock);
 
        /* No need to wait as mach port is already known */
        _PTHREAD_LOCK(_pthread_list_lock);
 
-       TAILQ_FOREACH(p, &__pthread_head, plist) {
+       TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
                if (_pthread_kernel_thread(p) == kernel_thread) {
                        break;
                }
                if (_pthread_kernel_thread(p) == kernel_thread) {
                        break;
                }
@@ -847,6 +963,7 @@ size_t
 pthread_get_stacksize_np(pthread_t t)
 {
        size_t size = 0;
 pthread_get_stacksize_np(pthread_t t)
 {
        size_t size = 0;
+       size_t stacksize = t->stackaddr - t->stackbottom;
 
        if (t == NULL) {
                return ESRCH; // XXX bug?
 
        if (t == NULL) {
                return ESRCH; // XXX bug?
@@ -863,7 +980,7 @@ pthread_get_stacksize_np(pthread_t t)
        //
        // Of course, on arm rlim_cur == rlim_max and there's only the one guard
        // page.  So, we can skip all this there.
        //
        // Of course, on arm rlim_cur == rlim_max and there's only the one guard
        // page.  So, we can skip all this there.
-       if (t == &_thread && t->stacksize + vm_page_size != t->freesize) {
+       if (t == main_thread() && stacksize + vm_page_size != t->freesize) {
                // We want to call getrlimit() just once, as it's relatively expensive
                static size_t rlimit_stack;
 
                // We want to call getrlimit() just once, as it's relatively expensive
                static size_t rlimit_stack;
 
@@ -877,55 +994,46 @@ pthread_get_stacksize_np(pthread_t t)
                }
 
                if (rlimit_stack == 0 || rlimit_stack > t->freesize) {
                }
 
                if (rlimit_stack == 0 || rlimit_stack > t->freesize) {
-                       return t->stacksize;
+                       return stacksize;
                } else {
                        return rlimit_stack;
                }
        }
 #endif /* !defined(__arm__) && !defined(__arm64__) */
 
                } else {
                        return rlimit_stack;
                }
        }
 #endif /* !defined(__arm__) && !defined(__arm64__) */
 
-       if (t == pthread_self() || t == &_thread) {
-               return t->stacksize;
+       if (t == pthread_self() || t == main_thread()) {
+               size = stacksize;
+               goto out;
        }
 
        }
 
-       _PTHREAD_LOCK(_pthread_list_lock);
-
-       if (_pthread_is_valid_locked(t)) {
-               size = t->stacksize;
+       if (_pthread_validate_thread_and_list_lock(t)) {
+               size = stacksize;
+               _PTHREAD_UNLOCK(_pthread_list_lock);
        } else {
                size = ESRCH; // XXX bug?
        }
 
        } else {
                size = ESRCH; // XXX bug?
        }
 
-       _PTHREAD_UNLOCK(_pthread_list_lock);
-
-       return size;
+out:
+       // <rdar://problem/42588315> binary compatibility issues force us to return
+       // DEFAULT_STACK_SIZE here when we do not know the size of the stack
+       return size ? size : DEFAULT_STACK_SIZE;
 }
 
 PTHREAD_NOEXPORT_VARIANT
 void *
 pthread_get_stackaddr_np(pthread_t t)
 {
 }
 
 PTHREAD_NOEXPORT_VARIANT
 void *
 pthread_get_stackaddr_np(pthread_t t)
 {
-       void *addr = NULL;
-
-       if (t == NULL) {
-               return (void *)(uintptr_t)ESRCH; // XXX bug?
-       }
-
        // since the main thread will not get de-allocated from underneath us
        // since the main thread will not get de-allocated from underneath us
-       if (t == pthread_self() || t == &_thread) {
+       if (t == pthread_self() || t == main_thread()) {
                return t->stackaddr;
        }
 
                return t->stackaddr;
        }
 
-       _PTHREAD_LOCK(_pthread_list_lock);
-
-       if (_pthread_is_valid_locked(t)) {
-               addr = t->stackaddr;
-       } else {
-               addr = (void *)(uintptr_t)ESRCH; // XXX bug?
+       if (!_pthread_validate_thread_and_list_lock(t)) {
+               return (void *)(uintptr_t)ESRCH; // XXX bug?
        }
 
        }
 
+       void *addr = t->stackaddr;
        _PTHREAD_UNLOCK(_pthread_list_lock);
        _PTHREAD_UNLOCK(_pthread_list_lock);
-
        return addr;
 }
 
        return addr;
 }
 
@@ -979,32 +1087,30 @@ _pthread_dealloc_special_reply_port(pthread_t t)
 {
        mach_port_t special_reply_port = _pthread_special_reply_port(t);
        if (special_reply_port != MACH_PORT_NULL) {
 {
        mach_port_t special_reply_port = _pthread_special_reply_port(t);
        if (special_reply_port != MACH_PORT_NULL) {
-               mach_port_mod_refs(mach_task_self(), special_reply_port,
-                               MACH_PORT_RIGHT_RECEIVE, -1);
+               thread_destruct_special_reply_port(special_reply_port,
+                               THREAD_SPECIAL_REPLY_PORT_ALL);
        }
 }
 
 pthread_t
 pthread_main_thread_np(void)
 {
        }
 }
 
 pthread_t
 pthread_main_thread_np(void)
 {
-       return &_thread;
+       return main_thread();
 }
 
 /* returns non-zero if the current thread is the main thread */
 int
 pthread_main_np(void)
 {
 }
 
 /* returns non-zero if the current thread is the main thread */
 int
 pthread_main_np(void)
 {
-       pthread_t self = pthread_self();
-
-       return ((self->detached & _PTHREAD_CREATE_PARENT) == _PTHREAD_CREATE_PARENT);
+       return pthread_self() == main_thread();
 }
 
 
 }
 
 
-/* if we are passed in a pthread_t that is NULL, then we return
-   the current thread's thread_id. So folks don't have to call
-   pthread_self, in addition to us doing it, if they just want
  their thread_id.
-*/
+/*
+ * if we are passed in a pthread_t that is NULL, then we return the current
+ * thread's thread_id. So folks don't have to call pthread_self, in addition to
* us doing it, if they just want their thread_id.
+ */
 PTHREAD_NOEXPORT_VARIANT
 int
 pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
 PTHREAD_NOEXPORT_VARIANT
 int
 pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
@@ -1018,11 +1124,10 @@ pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
 
        if (thread == NULL || thread == self) {
                *thread_id = self->thread_id;
 
        if (thread == NULL || thread == self) {
                *thread_id = self->thread_id;
+       } else if (!_pthread_validate_thread_and_list_lock(thread)) {
+               res = ESRCH;
        } else {
        } else {
-               _PTHREAD_LOCK(_pthread_list_lock);
-               if (!_pthread_is_valid_locked(thread)) {
-                       res = ESRCH;
-               } else if (thread->thread_id == 0) {
+               if (thread->thread_id == 0) {
                        res = EINVAL;
                } else {
                        *thread_id = thread->thread_id;
                        res = EINVAL;
                } else {
                        *thread_id = thread->thread_id;
@@ -1036,20 +1141,18 @@ PTHREAD_NOEXPORT_VARIANT
 int
 pthread_getname_np(pthread_t thread, char *threadname, size_t len)
 {
 int
 pthread_getname_np(pthread_t thread, char *threadname, size_t len)
 {
-       int res = 0;
+       if (thread == pthread_self()) {
+               strlcpy(threadname, thread->pthread_name, len);
+               return 0;
+       }
 
 
-       if (thread == NULL) {
+       if (!_pthread_validate_thread_and_list_lock(thread)) {
                return ESRCH;
        }
 
                return ESRCH;
        }
 
-       _PTHREAD_LOCK(_pthread_list_lock);
-       if (_pthread_is_valid_locked(thread)) {
-               strlcpy(threadname, thread->pthread_name, len);
-       } else {
-               res = ESRCH;
-       }
+       strlcpy(threadname, thread->pthread_name, len);
        _PTHREAD_UNLOCK(_pthread_list_lock);
        _PTHREAD_UNLOCK(_pthread_list_lock);
-       return res;
+       return 0;
 }
 
 
 }
 
 
@@ -1079,219 +1182,122 @@ pthread_setname_np(const char *name)
 
 PTHREAD_ALWAYS_INLINE
 static inline void
 
 PTHREAD_ALWAYS_INLINE
 static inline void
-__pthread_add_thread(pthread_t t, const pthread_attr_t *attrs,
-               bool parent, bool from_mach_thread)
+__pthread_add_thread(pthread_t t, bool from_mach_thread)
 {
 {
-       bool should_deallocate = false;
-       bool should_add = true;
-
-       mach_port_t kport = _pthread_kernel_thread(t);
-       if (os_slowpath(!MACH_PORT_VALID(kport))) {
-               PTHREAD_CLIENT_CRASH(kport,
-                               "Unable to allocate thread port, possible port leak");
-       }
-
        if (from_mach_thread) {
                _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
        } else {
                _PTHREAD_LOCK(_pthread_list_lock);
        }
 
        if (from_mach_thread) {
                _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
        } else {
                _PTHREAD_LOCK(_pthread_list_lock);
        }
 
-       // The parent and child threads race to add the thread to the list.
-       // When called by the parent:
-       //  - set parentcheck to true
-       //  - back off if childrun is true
-       // When called by the child:
-       //  - set childrun to true
-       //  - back off if parentcheck is true
-       if (parent) {
-               t->parentcheck = 1;
-               if (t->childrun) {
-                       // child got here first, don't add.
-                       should_add = false;
-               }
+       TAILQ_INSERT_TAIL(&__pthread_head, t, tl_plist);
+       _pthread_count++;
 
 
-               // If the child exits before we check in then it has to keep
-               // the thread structure memory alive so our dereferences above
-               // are valid. If it's a detached thread, then no joiner will
-               // deallocate the thread structure itself. So we do it here.
-               if (t->childexit) {
-                       should_add = false;
-                       should_deallocate = ((t->detached & PTHREAD_CREATE_DETACHED) == PTHREAD_CREATE_DETACHED);
-               }
+       if (from_mach_thread) {
+               _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
        } else {
        } else {
-               t->childrun = 1;
-               if (t->parentcheck) {
-                       // Parent got here first, don't add.
-                       should_add = false;
-               }
-               if (t->wqthread) {
-                       // Work queue threads have no parent. Simulate.
-                       t->parentcheck = 1;
-               }
+               _PTHREAD_UNLOCK(_pthread_list_lock);
        }
 
        }
 
-       if (should_add) {
-               TAILQ_INSERT_TAIL(&__pthread_head, t, plist);
-               _pthread_count++;
-
-               /*
-                * Set some initial values which we know in the pthread structure in
-                * case folks try to get the values before the thread can set them.
-                */
-               if (parent && attrs && attrs->schedset == 0) {
-                       t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass;
-               }
+       if (!from_mach_thread) {
+               // PR-26275485: Mach threads will likely crash trying to run
+               // introspection code.  Since the fall out from the introspection
+               // code not seeing the injected thread is likely less than crashing
+               // in the introspection code, just don't make the call.
+               _pthread_introspection_thread_create(t);
        }
        }
+}
 
 
-       if (from_mach_thread){
-               _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
+PTHREAD_ALWAYS_INLINE
+static inline void
+__pthread_undo_add_thread(pthread_t t, bool from_mach_thread)
+{
+       if (from_mach_thread) {
+               _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
        } else {
        } else {
-               _PTHREAD_UNLOCK(_pthread_list_lock);
+               _PTHREAD_LOCK(_pthread_list_lock);
        }
 
        }
 
-       if (parent) {
-               if (!from_mach_thread) {
-                       // PR-26275485: Mach threads will likely crash trying to run
-                       // introspection code.  Since the fall out from the introspection
-                       // code not seeing the injected thread is likely less than crashing
-                       // in the introspection code, just don't make the call.
-                       _pthread_introspection_thread_create(t, should_deallocate);
-               }
-               if (should_deallocate) {
-                       _pthread_deallocate(t);
-               }
+       TAILQ_REMOVE(&__pthread_head, t, tl_plist);
+       _pthread_count--;
+
+       if (from_mach_thread) {
+               _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
        } else {
        } else {
-               _pthread_introspection_thread_start(t);
+               _PTHREAD_UNLOCK(_pthread_list_lock);
        }
 }
 
        }
 }
 
-// <rdar://problem/12544957> must always inline this function to avoid epilogues
-// Returns EBUSY if the thread structure should be kept alive (is joinable).
-// Returns ESRCH if the thread structure is no longer valid (was detached).
 PTHREAD_ALWAYS_INLINE
 PTHREAD_ALWAYS_INLINE
-static inline int
-__pthread_remove_thread(pthread_t t, bool child, bool *should_exit)
+static inline void
+__pthread_started_thread(pthread_t t)
 {
 {
-       int ret = 0;
-
-       bool should_remove = true;
-
-       _PTHREAD_LOCK(_pthread_list_lock);
-
-       // When a thread removes itself:
-       //  - Set the childexit flag indicating that the thread has exited.
-       //  - Return false if parentcheck is zero (must keep structure)
-       //  - If the thread is joinable, keep it on the list so that
-       //    the join operation succeeds. Still decrement the running
-       //    thread count so that we exit if no threads are running.
-       //  - Update the running thread count.
-       // When another thread removes a joinable thread:
-       //  - CAREFUL not to dereference the thread before verifying that the
-       //    reference is still valid using _pthread_is_valid_locked().
-       //  - Remove the thread from the list.
-
-       if (child) {
-               t->childexit = 1;
-               if (t->parentcheck == 0) {
-                       ret = EBUSY;
-               }
-               if ((t->detached & PTHREAD_CREATE_JOINABLE) != 0) {
-                       ret = EBUSY;
-                       should_remove = false;
-               }
-               *should_exit = (--_pthread_count <= 0);
-       } else if (!_pthread_is_valid_locked(t)) {
-               ret = ESRCH;
-               should_remove = false;
-       } else if ((t->detached & PTHREAD_CREATE_JOINABLE) == 0) {
-               // If we found a thread but it's not joinable, bail.
-               ret = ESRCH;
-               should_remove = false;
-       } else if (t->parentcheck == 0) {
-               // If we're not the child thread *and* the parent has not finished
-               // creating the thread yet, then we are another thread that's joining
-               // and we cannot deallocate the pthread.
-               ret = EBUSY;
-       }
-       if (should_remove) {
-               TAILQ_REMOVE(&__pthread_head, t, plist);
+       mach_port_t kport = _pthread_kernel_thread(t);
+       if (os_slowpath(!MACH_PORT_VALID(kport))) {
+               PTHREAD_CLIENT_CRASH(kport,
+                               "Unable to allocate thread port, possible port leak");
        }
        }
-
-       _PTHREAD_UNLOCK(_pthread_list_lock);
-
-       return ret;
+       _pthread_introspection_thread_start(t);
 }
 
 }
 
+#define _PTHREAD_CREATE_NONE              0x0
+#define _PTHREAD_CREATE_FROM_MACH_THREAD  0x1
+#define _PTHREAD_CREATE_SUSPENDED         0x2
+
 static int
 static int
-_pthread_create(pthread_t *thread,
-       const pthread_attr_t *attr,
-       void *(*start_routine)(void *),
-       void *arg,
-       bool from_mach_thread)
+_pthread_create(pthread_t *thread, const pthread_attr_t *attrs,
+               void *(*start_routine)(void *), void *arg, unsigned int create_flags)
 {
        pthread_t t = NULL;
 {
        pthread_t t = NULL;
-       unsigned int flags = 0;
+       void *stack = NULL;
+       bool from_mach_thread = (create_flags & _PTHREAD_CREATE_FROM_MACH_THREAD);
 
 
-       pthread_attr_t *attrs = (pthread_attr_t *)attr;
        if (attrs == NULL) {
                attrs = &_pthread_attr_default;
        } else if (attrs->sig != _PTHREAD_ATTR_SIG) {
                return EINVAL;
        }
 
        if (attrs == NULL) {
                attrs = &_pthread_attr_default;
        } else if (attrs->sig != _PTHREAD_ATTR_SIG) {
                return EINVAL;
        }
 
-       if (attrs->detached == PTHREAD_CREATE_DETACHED) {
-               flags |= PTHREAD_START_DETACHED;
-       }
-
+       unsigned int flags = PTHREAD_START_CUSTOM;
        if (attrs->schedset != 0) {
        if (attrs->schedset != 0) {
+               struct sched_param p;
+               _pthread_attr_get_schedparam(attrs, &p);
                flags |= PTHREAD_START_SETSCHED;
                flags |= ((attrs->policy & PTHREAD_START_POLICY_MASK) << PTHREAD_START_POLICY_BITSHIFT);
                flags |= PTHREAD_START_SETSCHED;
                flags |= ((attrs->policy & PTHREAD_START_POLICY_MASK) << PTHREAD_START_POLICY_BITSHIFT);
-               flags |= (attrs->param.sched_priority & PTHREAD_START_IMPORTANCE_MASK);
+               flags |= (p.sched_priority & PTHREAD_START_IMPORTANCE_MASK);
        } else if (attrs->qosclass != 0) {
                flags |= PTHREAD_START_QOSCLASS;
                flags |= (attrs->qosclass & PTHREAD_START_QOSCLASS_MASK);
        }
        } else if (attrs->qosclass != 0) {
                flags |= PTHREAD_START_QOSCLASS;
                flags |= (attrs->qosclass & PTHREAD_START_QOSCLASS_MASK);
        }
+       if (create_flags & _PTHREAD_CREATE_SUSPENDED) {
+               flags |= PTHREAD_START_SUSPENDED;
+       }
 
        __is_threaded = 1;
 
 
        __is_threaded = 1;
 
-       void *stack;
-
-       if (attrs->fastpath) {
-               // kernel will allocate thread and stack, pass stacksize.
-               stack = (void *)attrs->stacksize;
-       } else {
-               // allocate the thread and its stack
-               flags |= PTHREAD_START_CUSTOM;
-
-               int res;
-               res = _pthread_allocate(&t, attrs, &stack);
-               if (res) {
-                       return res;
-               }
-
-               t->arg = arg;
-               t->fun = start_routine;
+       t =_pthread_allocate(attrs, &stack);
+       if (t == NULL) {
+               return EAGAIN;
        }
 
        }
 
-       pthread_t t2;
-       t2 = __bsdthread_create(start_routine, arg, stack, t, flags);
-       if (t2 == (pthread_t)-1) {
+       t->arg = arg;
+       t->fun = start_routine;
+       __pthread_add_thread(t, from_mach_thread);
+
+       if (__bsdthread_create(start_routine, arg, stack, t, flags) ==
+                       (pthread_t)-1) {
                if (errno == EMFILE) {
                        PTHREAD_CLIENT_CRASH(0,
                                        "Unable to allocate thread port, possible port leak");
                }
                if (errno == EMFILE) {
                        PTHREAD_CLIENT_CRASH(0,
                                        "Unable to allocate thread port, possible port leak");
                }
-               if (flags & PTHREAD_START_CUSTOM) {
-                       // free the thread and stack if we allocated it
-                       _pthread_deallocate(t);
-               }
+               __pthread_undo_add_thread(t, from_mach_thread);
+               _pthread_deallocate(t, from_mach_thread);
                return EAGAIN;
        }
                return EAGAIN;
        }
-       if (t == NULL) {
-               t = t2;
-       }
 
 
-       __pthread_add_thread(t, attrs, true, from_mach_thread);
+       if (create_flags & _PTHREAD_CREATE_SUSPENDED) {
+               _pthread_markcancel_if_canceled(t, _pthread_kernel_thread(t));
+       }
 
        // n.b. if a thread is created detached and exits, t will be invalid
        *thread = t;
 
        // n.b. if a thread is created detached and exits, t will be invalid
        *thread = t;
@@ -1299,78 +1305,87 @@ _pthread_create(pthread_t *thread,
 }
 
 int
 }
 
 int
-pthread_create(pthread_t *thread,
-       const pthread_attr_t *attr,
-       void *(*start_routine)(void *),
-       void *arg)
+pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+               void *(*start_routine)(void *), void *arg)
 {
 {
-       return _pthread_create(thread, attr, start_routine, arg, false);
+       unsigned int flags = _PTHREAD_CREATE_NONE;
+       return _pthread_create(thread, attr, start_routine, arg, flags);
 }
 
 int
 }
 
 int
-pthread_create_from_mach_thread(pthread_t *thread,
-       const pthread_attr_t *attr,
-       void *(*start_routine)(void *),
-       void *arg)
+pthread_create_from_mach_thread(pthread_t *thread, const pthread_attr_t *attr,
+               void *(*start_routine)(void *), void *arg)
 {
 {
-       return _pthread_create(thread, attr, start_routine, arg, true);
+       unsigned int flags = _PTHREAD_CREATE_FROM_MACH_THREAD;
+       return _pthread_create(thread, attr, start_routine, arg, flags);
 }
 
 }
 
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
+/* Functions defined in machine-dependent files. */
+PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp);
+
 PTHREAD_NORETURN
 static void
 _pthread_suspended_body(pthread_t self)
 {
        _pthread_set_self(self);
 PTHREAD_NORETURN
 static void
 _pthread_suspended_body(pthread_t self)
 {
        _pthread_set_self(self);
-       __pthread_add_thread(self, NULL, false, false);
+       __pthread_started_thread(self);
        _pthread_exit(self, (self->fun)(self->arg));
 }
 
        _pthread_exit(self, (self->fun)(self->arg));
 }
 
-int
-pthread_create_suspended_np(pthread_t *thread,
-       const pthread_attr_t *attr,
-       void *(*start_routine)(void *),
-       void *arg)
+static int
+_pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attrs,
+               void *(*start_routine)(void *), void *arg)
 {
 {
-       int res;
+       pthread_t t;
        void *stack;
        mach_port_t kernel_thread = MACH_PORT_NULL;
 
        void *stack;
        mach_port_t kernel_thread = MACH_PORT_NULL;
 
-       const pthread_attr_t *attrs = attr;
        if (attrs == NULL) {
                attrs = &_pthread_attr_default;
        } else if (attrs->sig != _PTHREAD_ATTR_SIG) {
                return EINVAL;
        }
 
        if (attrs == NULL) {
                attrs = &_pthread_attr_default;
        } else if (attrs->sig != _PTHREAD_ATTR_SIG) {
                return EINVAL;
        }
 
-       pthread_t t;
-       res = _pthread_allocate(&t, attrs, &stack);
-       if (res) {
-               return res;
+       t = _pthread_allocate(attrs, &stack);
+       if (t == NULL) {
+               return EAGAIN;
        }
 
        }
 
-       *thread = t;
-
-       kern_return_t kr;
-       kr = thread_create(mach_task_self(), &kernel_thread);
-       if (kr != KERN_SUCCESS) {
-               //PTHREAD_ABORT("thread_create() failed: %d", kern_res);
-               return EINVAL; /* Need better error here? */
+       if (thread_create(mach_task_self(), &kernel_thread) != KERN_SUCCESS) {
+               _pthread_deallocate(t, false);
+               return EAGAIN;
        }
 
        _pthread_set_kernel_thread(t, kernel_thread);
        }
 
        _pthread_set_kernel_thread(t, kernel_thread);
-       (void)pthread_setschedparam_internal(t, kernel_thread, t->policy, &t->param);
+       (void)pthread_setschedparam_internal(t, kernel_thread,
+                       t->tl_policy, &t->tl_param);
 
        __is_threaded = 1;
 
        t->arg = arg;
        t->fun = start_routine;
 
        __is_threaded = 1;
 
        t->arg = arg;
        t->fun = start_routine;
-
        t->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
        t->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
-       __pthread_add_thread(t, NULL, true, false);
+       __pthread_add_thread(t, false);
 
        // Set up a suspended thread.
 
        // Set up a suspended thread.
-       _pthread_setup(t, _pthread_suspended_body, stack, 1, 0);
-       return res;
+       _pthread_setup_suspended(t, _pthread_suspended_body, stack);
+       *thread = t;
+       return 0;
+}
+#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX
+
+int
+pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attr,
+               void *(*start_routine)(void *), void *arg)
+{
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
+       if (_os_xbs_chrooted) {
+               return _pthread_create_suspended_np(thread, attr, start_routine, arg);
+       }
+#endif
+       unsigned int flags = _PTHREAD_CREATE_SUSPENDED;
+       return _pthread_create(thread, attr, start_routine, arg, flags);
 }
 
 
 }
 
 
@@ -1379,33 +1394,31 @@ int
 pthread_detach(pthread_t thread)
 {
        int res = 0;
 pthread_detach(pthread_t thread)
 {
        int res = 0;
-       bool join = false;
-       semaphore_t sema = SEMAPHORE_NULL;
+       bool join = false, wake = false;
 
 
-       if (!_pthread_is_valid(thread, PTHREAD_IS_VALID_LOCK_THREAD, NULL)) {
-               return ESRCH; // Not a valid thread to detach.
+       if (!_pthread_validate_thread_and_list_lock(thread)) {
+               return ESRCH;
        }
 
        }
 
-       if ((thread->detached & PTHREAD_CREATE_DETACHED) ||
-                       !(thread->detached & PTHREAD_CREATE_JOINABLE)) {
+       if (!thread->tl_joinable) {
                res = EINVAL;
                res = EINVAL;
-       } else if (thread->detached & _PTHREAD_EXITED) {
+       } else if (thread->tl_exit_gate == MACH_PORT_DEAD) {
                // Join the thread if it's already exited.
                join = true;
        } else {
                // Join the thread if it's already exited.
                join = true;
        } else {
-               thread->detached &= ~PTHREAD_CREATE_JOINABLE;
-               thread->detached |= PTHREAD_CREATE_DETACHED;
-               sema = thread->joiner_notify;
+               thread->tl_joinable = false; // _pthread_joiner_prepost_wake uses this
+               if (thread->tl_join_ctx) {
+                       (void)_pthread_joiner_prepost_wake(thread);
+                       wake = true;
+               }
        }
        }
-
-       _PTHREAD_UNLOCK(thread->lock);
+       _PTHREAD_UNLOCK(_pthread_list_lock);
 
        if (join) {
                pthread_join(thread, NULL);
 
        if (join) {
                pthread_join(thread, NULL);
-       } else if (sema) {
-               semaphore_signal(sema);
+       } else if (wake) {
+               _pthread_joiner_wake(thread);
        }
        }
-
        return res;
 }
 
        return res;
 }
 
@@ -1418,7 +1431,7 @@ pthread_kill(pthread_t th, int sig)
        }
 
        mach_port_t kport = MACH_PORT_NULL;
        }
 
        mach_port_t kport = MACH_PORT_NULL;
-       if (!_pthread_is_valid(th, 0, &kport)) {
+       if (!_pthread_is_valid(th, &kport)) {
                return ESRCH; // Not a valid thread.
        }
 
                return ESRCH; // Not a valid thread.
        }
 
@@ -1452,18 +1465,19 @@ __pthread_workqueue_setkill(int enable)
 /* For compatibility... */
 
 pthread_t
 /* For compatibility... */
 
 pthread_t
-_pthread_self(void) {
+_pthread_self(void)
+{
        return pthread_self();
 }
 
 /*
  * Terminate a thread.
  */
        return pthread_self();
 }
 
 /*
  * Terminate a thread.
  */
-int __disable_threadsignal(int);
+extern int __disable_threadsignal(int);
 
 PTHREAD_NORETURN
 static void
 
 PTHREAD_NORETURN
 static void
-_pthread_exit(pthread_t self, void *value_ptr)
+_pthread_exit(pthread_t self, void *exit_value)
 {
        struct __darwin_pthread_handler_rec *handler;
 
 {
        struct __darwin_pthread_handler_rec *handler;
 
@@ -1471,7 +1485,7 @@ _pthread_exit(pthread_t self, void *value_ptr)
        __disable_threadsignal(1);
 
        // Set cancel state to disable and type to deferred
        __disable_threadsignal(1);
 
        // Set cancel state to disable and type to deferred
-       _pthread_setcancelstate_exit(self, value_ptr, __unix_conforming);
+       _pthread_setcancelstate_exit(self, exit_value);
 
        while ((handler = self->__cleanup_stack) != 0) {
                (handler->__routine)(handler->__arg);
 
        while ((handler = self->__cleanup_stack) != 0) {
                (handler->__routine)(handler->__arg);
@@ -1479,71 +1493,44 @@ _pthread_exit(pthread_t self, void *value_ptr)
        }
        _pthread_tsd_cleanup(self);
 
        }
        _pthread_tsd_cleanup(self);
 
-       _PTHREAD_LOCK(self->lock);
-       self->detached |= _PTHREAD_EXITED;
-       self->exit_value = value_ptr;
-
-       if ((self->detached & PTHREAD_CREATE_JOINABLE) &&
-                       self->joiner_notify == SEMAPHORE_NULL) {
-               self->joiner_notify = (semaphore_t)os_get_cached_semaphore();
-       }
-       _PTHREAD_UNLOCK(self->lock);
-
        // Clear per-thread semaphore cache
        os_put_cached_semaphore(SEMAPHORE_NULL);
 
        // Clear per-thread semaphore cache
        os_put_cached_semaphore(SEMAPHORE_NULL);
 
-       _pthread_terminate_invoke(self);
+       _pthread_terminate_invoke(self, exit_value);
 }
 
 void
 }
 
 void
-pthread_exit(void *value_ptr)
+pthread_exit(void *exit_value)
 {
        pthread_t self = pthread_self();
 {
        pthread_t self = pthread_self();
-       if (self->wqthread == 0) {
-               _pthread_exit(self, value_ptr);
-       } else {
-               PTHREAD_ABORT("pthread_exit() may only be called against threads created via pthread_create()");
+       if (os_unlikely(self->wqthread)) {
+               PTHREAD_CLIENT_CRASH(0, "pthread_exit() called from a thread "
+                               "not created by pthread_create()");
        }
        }
+       _pthread_exit(self, exit_value);
 }
 
 
 PTHREAD_NOEXPORT_VARIANT
 int
 }
 
 
 PTHREAD_NOEXPORT_VARIANT
 int
-pthread_getschedparam(pthread_t thread,
-                     int *policy,
-                     struct sched_param *param)
+pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param)
 {
 {
-       int ret = 0;
-
-       if (thread == NULL) {
+       if (!_pthread_validate_thread_and_list_lock(thread)) {
                return ESRCH;
        }
 
                return ESRCH;
        }
 
-       _PTHREAD_LOCK(_pthread_list_lock);
-
-       if (_pthread_is_valid_locked(thread)) {
-               if (policy) {
-                       *policy = thread->policy;
-               }
-               if (param) {
-                       *param = thread->param;
-               }
-       } else {
-               ret = ESRCH;
-       }
-
+       if (policy) *policy = thread->tl_policy;
+       if (param) *param = thread->tl_param;
        _PTHREAD_UNLOCK(_pthread_list_lock);
        _PTHREAD_UNLOCK(_pthread_list_lock);
-
-       return ret;
+       return 0;
 }
 
 
 }
 
 
+
 PTHREAD_ALWAYS_INLINE
 static inline int
 PTHREAD_ALWAYS_INLINE
 static inline int
-pthread_setschedparam_internal(pthread_t thread,
-                     mach_port_t kport,
-                     int policy,
-                     const struct sched_param *param)
+pthread_setschedparam_internal(pthread_t thread, mach_port_t kport, int policy,
+               const struct sched_param *param)
 {
        policy_base_data_t bases;
        policy_base_t base;
 {
        policy_base_data_t bases;
        policy_base_t base;
@@ -1575,41 +1562,37 @@ pthread_setschedparam_internal(pthread_t thread,
        return (ret != KERN_SUCCESS) ? EINVAL : 0;
 }
 
        return (ret != KERN_SUCCESS) ? EINVAL : 0;
 }
 
-
 PTHREAD_NOEXPORT_VARIANT
 int
 pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
 {
        mach_port_t kport = MACH_PORT_NULL;
 PTHREAD_NOEXPORT_VARIANT
 int
 pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
 {
        mach_port_t kport = MACH_PORT_NULL;
-       int res;
        int bypass = 1;
 
        // since the main thread will not get de-allocated from underneath us
        int bypass = 1;
 
        // since the main thread will not get de-allocated from underneath us
-       if (t == pthread_self() || t == &_thread) {
+       if (t == pthread_self() || t == main_thread()) {
                kport = _pthread_kernel_thread(t);
        } else {
                bypass = 0;
                kport = _pthread_kernel_thread(t);
        } else {
                bypass = 0;
-               (void)_pthread_is_valid(t, 0, &kport);
+               if (!_pthread_is_valid(t, &kport)) {
+                       return ESRCH;
+               }
        }
 
        }
 
-       res = pthread_setschedparam_internal(t, kport, policy, param);
-       if (res == 0) {
-               if (bypass == 0) {
-                       // Ensure the thread is still valid.
-                       _PTHREAD_LOCK(_pthread_list_lock);
-                       if (_pthread_is_valid_locked(t)) {
-                               t->policy = policy;
-                               t->param = *param;
-                       } else {
-                               res = ESRCH;
-                       }
-                       _PTHREAD_UNLOCK(_pthread_list_lock);
-               }  else {
-                       t->policy = policy;
-                       t->param = *param;
-               }
+       int res = pthread_setschedparam_internal(t, kport, policy, param);
+       if (res) return res;
+
+       if (bypass) {
+               _PTHREAD_LOCK(_pthread_list_lock);
+       } else if (!_pthread_validate_thread_and_list_lock(t)) {
+               // Ensure the thread is still valid.
+               return ESRCH;
        }
        }
-       return res;
+
+       t->tl_policy = policy;
+       t->tl_param = *param;
+       _PTHREAD_UNLOCK(_pthread_list_lock);
+       return 0;
 }
 
 
 }
 
 
@@ -1639,25 +1622,49 @@ PTHREAD_NOINLINE
 void
 _pthread_set_self(pthread_t p)
 {
 void
 _pthread_set_self(pthread_t p)
 {
-       return _pthread_set_self_internal(p, true);
+#if VARIANT_DYLD
+       if (os_likely(!p)) {
+               return _pthread_set_self_dyld();
+       }
+#endif // VARIANT_DYLD
+       _pthread_set_self_internal(p, true);
 }
 
 }
 
-PTHREAD_ALWAYS_INLINE
-static inline void
-_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set)
+#if VARIANT_DYLD
+// _pthread_set_self_dyld is noinline+noexport to allow the option for
+// static libsyscall to adopt this as the entry point from mach_init if
+// desired
+PTHREAD_NOINLINE PTHREAD_NOEXPORT
+void
+_pthread_set_self_dyld(void)
 {
 {
-       if (p == NULL) {
-               p = &_thread;
-       }
+       pthread_t p = main_thread();
+       p->thread_id = __thread_selfid();
 
 
-       uint64_t tid = __thread_selfid();
-       if (tid == -1ull) {
-               PTHREAD_ABORT("failed to set thread_id");
+       if (os_unlikely(p->thread_id == -1ull)) {
+               PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id");
        }
 
        }
 
+       // <rdar://problem/40930651> pthread self and the errno address are the
+       // bare minimium TSD setup that dyld needs to actually function.  Without
+       // this, TSD access will fail and crash if it uses bits of Libc prior to
+       // library initialization. __pthread_init will finish the initialization
+       // during library init.
        p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = p;
        p->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &p->err_no;
        p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = p;
        p->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &p->err_no;
-       p->thread_id = tid;
+       _thread_set_tsd_base(&p->tsd[0]);
+}
+#endif // VARIANT_DYLD
+
+PTHREAD_ALWAYS_INLINE
+static inline void
+_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set)
+{
+       p->thread_id = __thread_selfid();
+
+       if (os_unlikely(p->thread_id == -1ull)) {
+               PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id");
+       }
 
        if (needs_tsd_base_set) {
                _thread_set_tsd_base(&p->tsd[0]);
 
        if (needs_tsd_base_set) {
                _thread_set_tsd_base(&p->tsd[0]);
@@ -1719,6 +1726,32 @@ pthread_setconcurrency(int new_level)
        return 0;
 }
 
        return 0;
 }
 
+#if !defined(VARIANT_STATIC)
+void *
+malloc(size_t sz)
+{
+       if (_pthread_malloc) {
+               return _pthread_malloc(sz);
+       } else {
+               return NULL;
+       }
+}
+
+void
+free(void *p)
+{
+       if (_pthread_free) {
+               _pthread_free(p);
+       }
+}
+#endif // VARIANT_STATIC
+
+/*
+ * Perform package initialization - called automatically when application starts
+ */
+struct ProgramVars; /* forward reference */
+
+#if !VARIANT_DYLD
 static unsigned long
 _pthread_strtoul(const char *p, const char **endptr, int base)
 {
 static unsigned long
 _pthread_strtoul(const char *p, const char **endptr, int base)
 {
@@ -1777,36 +1810,29 @@ out:
        return ret;
 }
 
        return ret;
 }
 
-#if !defined(VARIANT_STATIC)
-void *
-malloc(size_t sz)
+static void
+parse_ptr_munge_params(const char *envp[], const char *apple[])
 {
 {
-       if (_pthread_malloc) {
-               return _pthread_malloc(sz);
-       } else {
-               return NULL;
+       const char *p, *s;
+       p = _simple_getenv(apple, "ptr_munge");
+       if (p) {
+               _pthread_ptr_munge_token = _pthread_strtoul(p, &s, 16);
+               bzero((char *)p, strlen(p));
        }
        }
-}
-
-void
-free(void *p)
-{
-       if (_pthread_free) {
-               _pthread_free(p);
+#if !DEBUG
+       if (_pthread_ptr_munge_token) return;
+#endif
+       p = _simple_getenv(envp, "PTHREAD_PTR_MUNGE_TOKEN");
+       if (p) {
+               uintptr_t t = _pthread_strtoul(p, &s, 16);
+               if (t) _pthread_ptr_munge_token = t;
        }
 }
        }
 }
-#endif // VARIANT_STATIC
-
-/*
- * Perform package initialization - called automatically when application starts
- */
-struct ProgramVars; /* forward reference */
 
 int
 __pthread_init(const struct _libpthread_functions *pthread_funcs,
 
 int
 __pthread_init(const struct _libpthread_functions *pthread_funcs,
-              const char *envp[] __unused,
-              const char *apple[],
-              const struct ProgramVars *vars __unused)
+               const char *envp[], const char *apple[],
+               const struct ProgramVars *vars __unused)
 {
        // Save our provided pushed-down functions
        if (pthread_funcs) {
 {
        // Save our provided pushed-down functions
        if (pthread_funcs) {
@@ -1829,11 +1855,11 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
        host_t host = mach_host_self();
        kr = host_info(host, flavor, (host_info_t)&priority_info, &count);
        if (kr != KERN_SUCCESS) {
        host_t host = mach_host_self();
        kr = host_info(host, flavor, (host_info_t)&priority_info, &count);
        if (kr != KERN_SUCCESS) {
-               PTHREAD_ABORT("host_info(mach_host_self(), ...) failed: %s", mach_error_string(kr));
+               PTHREAD_INTERNAL_CRASH(kr, "host_info() failed");
        } else {
        } else {
-               default_priority = priority_info.user_priority;
-               min_priority = priority_info.minimum_priority;
-               max_priority = priority_info.maximum_priority;
+               default_priority = (uint8_t)priority_info.user_priority;
+               min_priority = (uint8_t)priority_info.minimum_priority;
+               max_priority = (uint8_t)priority_info.maximum_priority;
        }
        mach_port_deallocate(mach_task_self(), host);
 
        }
        mach_port_deallocate(mach_task_self(), host);
 
@@ -1863,12 +1889,22 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
                allocsize = 0;
        }
 
                allocsize = 0;
        }
 
-       pthread_t thread = &_thread;
-       pthread_attr_init(&_pthread_attr_default);
+       // Initialize random ptr_munge token from the kernel.
+       parse_ptr_munge_params(envp, apple);
+
+       // libpthread.a in dyld "owns" the main thread structure itself and sets
+       // up the tsd to point to it. So take the pthread_self() from there
+       // and make it our main thread point.
+       pthread_t thread = (pthread_t)_pthread_getspecific_direct(
+                       _PTHREAD_TSD_SLOT_PTHREAD_SELF);
+       PTHREAD_ASSERT(thread);
+       _main_thread_ptr = thread;
+
+       PTHREAD_ASSERT(_pthread_attr_default.qosclass ==
+                       _pthread_default_priority(0));
        _pthread_struct_init(thread, &_pthread_attr_default,
        _pthread_struct_init(thread, &_pthread_attr_default,
-                            stackaddr, stacksize,
-                            allocaddr, allocsize);
-       thread->detached = PTHREAD_CREATE_JOINABLE;
+                       stackaddr, stacksize, allocaddr, allocsize);
+       thread->tl_joinable = true;
 
        // Finish initialization with common code that is reinvoked on the
        // child side of a fork.
 
        // Finish initialization with common code that is reinvoked on the
        // child side of a fork.
@@ -1897,66 +1933,35 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
 
        return 0;
 }
 
        return 0;
 }
+#endif // !VARIANT_DYLD
 
 PTHREAD_NOEXPORT void
 _pthread_main_thread_init(pthread_t p)
 {
        TAILQ_INIT(&__pthread_head);
        _PTHREAD_LOCK_INIT(_pthread_list_lock);
 
 PTHREAD_NOEXPORT void
 _pthread_main_thread_init(pthread_t p)
 {
        TAILQ_INIT(&__pthread_head);
        _PTHREAD_LOCK_INIT(_pthread_list_lock);
-
-       // Re-use the main thread's static storage if no thread was provided.
-       if (p == NULL) {
-               if (_thread.tsd[0] != 0) {
-                       bzero(&_thread, sizeof(struct _pthread));
-               }
-               p = &_thread;
-       }
-
        _PTHREAD_LOCK_INIT(p->lock);
        _pthread_set_kernel_thread(p, mach_thread_self());
        _pthread_set_reply_port(p, mach_reply_port());
        p->__cleanup_stack = NULL;
        _PTHREAD_LOCK_INIT(p->lock);
        _pthread_set_kernel_thread(p, mach_thread_self());
        _pthread_set_reply_port(p, mach_reply_port());
        p->__cleanup_stack = NULL;
-       p->joiner_notify = SEMAPHORE_NULL;
-       p->joiner = MACH_PORT_NULL;
-       p->detached |= _PTHREAD_CREATE_PARENT;
+       p->tl_join_ctx = NULL;
+       p->tl_exit_gate = MACH_PORT_NULL;
        p->tsd[__TSD_SEMAPHORE_CACHE] = (void*)SEMAPHORE_NULL;
        p->tsd[__TSD_SEMAPHORE_CACHE] = (void*)SEMAPHORE_NULL;
+       p->tsd[__TSD_MACH_SPECIAL_REPLY] = 0;
        p->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
 
        // Initialize the list of threads with the new main thread.
        p->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
 
        // Initialize the list of threads with the new main thread.
-       TAILQ_INSERT_HEAD(&__pthread_head, p, plist);
+       TAILQ_INSERT_HEAD(&__pthread_head, p, tl_plist);
        _pthread_count = 1;
 
        _pthread_count = 1;
 
-       _pthread_set_self(p);
        _pthread_introspection_thread_start(p);
 }
 
        _pthread_introspection_thread_start(p);
 }
 
-int
-_pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming)
-{
-       int ret = __pthread_remove_thread(thread, false, NULL);
-       if (ret != 0 && ret != EBUSY) {
-               // Returns ESRCH if the thread was not created joinable.
-               return ret;
-       }
-
-       if (value_ptr) {
-               *value_ptr = _pthread_get_exit_value(thread, conforming);
-       }
-       _pthread_introspection_thread_destroy(thread);
-       if (ret != EBUSY) {
-               // __pthread_remove_thread returns EBUSY if the parent has not
-               // finished creating the thread (and is still expecting the pthread_t
-               // to be alive).
-               _pthread_deallocate(thread);
-       }
-       return 0;
-}
-
 int
 sched_yield(void)
 {
 int
 sched_yield(void)
 {
-    swtch_pri(0);
-    return 0;
+       swtch_pri(0);
+       return 0;
 }
 
 // XXX remove
 }
 
 // XXX remove
@@ -1974,22 +1979,25 @@ pthread_yield_np(void)
 
 
 
 
 
 
+// Libsystem knows about this symbol and exports it to libsyscall
 PTHREAD_NOEXPORT_VARIANT
 void
 _pthread_clear_qos_tsd(mach_port_t thread_port)
 {
        if (thread_port == MACH_PORT_NULL || (uintptr_t)_pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF) == thread_port) {
                /* Clear the current thread's TSD, that can be done inline. */
 PTHREAD_NOEXPORT_VARIANT
 void
 _pthread_clear_qos_tsd(mach_port_t thread_port)
 {
        if (thread_port == MACH_PORT_NULL || (uintptr_t)_pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF) == thread_port) {
                /* Clear the current thread's TSD, that can be done inline. */
-               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0));
+               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS,
+                               _pthread_unspecified_priority());
        } else {
                pthread_t p;
 
                _PTHREAD_LOCK(_pthread_list_lock);
 
        } else {
                pthread_t p;
 
                _PTHREAD_LOCK(_pthread_list_lock);
 
-               TAILQ_FOREACH(p, &__pthread_head, plist) {
+               TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
                        mach_port_t kp = _pthread_kernel_thread(p);
                        if (thread_port == kp) {
                        mach_port_t kp = _pthread_kernel_thread(p);
                        if (thread_port == kp) {
-                               p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+                               p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] =
+                                               _pthread_unspecified_priority();
                                break;
                        }
                }
                                break;
                        }
                }
@@ -1999,7 +2007,35 @@ _pthread_clear_qos_tsd(mach_port_t thread_port)
 }
 
 
 }
 
 
-/***** pthread workqueue support routines *****/
+#pragma mark pthread/stack_np.h public interface
+
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__)
+typedef uintptr_t frame_data_addr_t;
+
+struct frame_data {
+       frame_data_addr_t frame_addr_next;
+       frame_data_addr_t ret_addr;
+};
+#else
+#error ********** Unimplemented architecture
+#endif
+
+uintptr_t
+pthread_stack_frame_decode_np(uintptr_t frame_addr, uintptr_t *return_addr)
+{
+       struct frame_data *frame = (struct frame_data *)frame_addr;
+
+       if (return_addr) {
+               *return_addr = (uintptr_t)frame->ret_addr;
+       }
+
+       return (uintptr_t)frame->frame_addr_next;
+}
+
+
+#pragma mark pthread workqueue support routines
+
 
 PTHREAD_NOEXPORT void
 _pthread_bsdthread_init(struct _pthread_registration_data *data)
 
 PTHREAD_NOEXPORT void
 _pthread_bsdthread_init(struct _pthread_registration_data *data)
@@ -2011,19 +2047,18 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
        data->tsd_offset = offsetof(struct _pthread, tsd);
        data->mach_thread_self_offset = __TSD_MACH_THREAD_SELF * sizeof(void *);
 
        data->tsd_offset = offsetof(struct _pthread, tsd);
        data->mach_thread_self_offset = __TSD_MACH_THREAD_SELF * sizeof(void *);
 
-       int rv = __bsdthread_register(thread_start,
-                       start_wqthread, (int)PTHREAD_SIZE,
-                       (void*)data, (uintptr_t)sizeof(*data),
-                       data->dispatch_queue_offset);
+       int rv = __bsdthread_register(thread_start, start_wqthread, (int)PTHREAD_SIZE,
+                       (void*)data, (uintptr_t)sizeof(*data), data->dispatch_queue_offset);
 
        if (rv > 0) {
 
        if (rv > 0) {
-               if ((rv & PTHREAD_FEATURE_QOS_DEFAULT) == 0) {
-                       PTHREAD_INTERNAL_CRASH(rv,
-                                       "Missing required support for QOS_CLASS_DEFAULT");
-               }
-               if ((rv & PTHREAD_FEATURE_QOS_MAINTENANCE) == 0) {
-                       PTHREAD_INTERNAL_CRASH(rv,
-                                       "Missing required support for QOS_CLASS_MAINTENANCE");
+               int required_features =
+                               PTHREAD_FEATURE_FINEPRIO |
+                               PTHREAD_FEATURE_BSDTHREADCTL |
+                               PTHREAD_FEATURE_SETSELF |
+                               PTHREAD_FEATURE_QOS_MAINTENANCE |
+                               PTHREAD_FEATURE_QOS_DEFAULT;
+               if ((rv & required_features) != required_features) {
+                       PTHREAD_INTERNAL_CRASH(rv, "Missing required kernel support");
                }
                __pthread_supported_features = rv;
        }
                }
                __pthread_supported_features = rv;
        }
@@ -2039,9 +2074,13 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
 
        pthread_priority_t main_qos = (pthread_priority_t)data->main_qos;
 
 
        pthread_priority_t main_qos = (pthread_priority_t)data->main_qos;
 
-       if (_pthread_priority_get_qos_newest(main_qos) != QOS_CLASS_UNSPECIFIED) {
+       if (_pthread_priority_thread_qos(main_qos) != THREAD_QOS_UNSPECIFIED) {
                _pthread_set_main_qos(main_qos);
                _pthread_set_main_qos(main_qos);
-               _thread.tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos;
+               main_thread()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos;
+       }
+
+       if (data->stack_addr_hint) {
+               __pthread_stack_hint = data->stack_addr_hint;
        }
 
        if (__libdispatch_workerfunction != NULL) {
        }
 
        if (__libdispatch_workerfunction != NULL) {
@@ -2050,191 +2089,188 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
        }
 }
 
        }
 }
 
-// workqueue entry point from kernel
-PTHREAD_NORETURN
-void
-_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr, void *keventlist, int flags, int nkevents)
+PTHREAD_NOINLINE
+static void
+_pthread_wqthread_legacy_worker_wrap(pthread_priority_t pp)
+{
+       /* Old thread priorities are inverted from where we have them in
+        * the new flexible priority scheme. The highest priority is zero,
+        * up to 2, with background at 3.
+        */
+       pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
+       bool overcommit = (pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG);
+       int opts = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
+
+       switch (_pthread_priority_thread_qos(pp)) {
+       case THREAD_QOS_USER_INITIATED:
+               return (*func)(WORKQ_HIGH_PRIOQUEUE, opts, NULL);
+       case THREAD_QOS_LEGACY:
+               /* B&I builders can't pass a QOS_CLASS_DEFAULT thread to dispatch, for fear of the QoS being
+                * picked up by NSThread (et al) and transported around the system. So change the TSD to
+                * make this thread look like QOS_CLASS_USER_INITIATED even though it will still run as legacy.
+                */
+               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS,
+                               _pthread_priority_make_from_thread_qos(THREAD_QOS_USER_INITIATED, 0, 0));
+               return (*func)(WORKQ_DEFAULT_PRIOQUEUE, opts, NULL);
+       case THREAD_QOS_UTILITY:
+               return (*func)(WORKQ_LOW_PRIOQUEUE, opts, NULL);
+       case THREAD_QOS_BACKGROUND:
+               return (*func)(WORKQ_BG_PRIOQUEUE, opts, NULL);
+       }
+       PTHREAD_INTERNAL_CRASH(pp, "Invalid pthread priority for the legacy interface");
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline pthread_priority_t
+_pthread_wqthread_priority(int flags)
 {
 {
-       PTHREAD_ASSERT(flags & WQ_FLAG_THREAD_NEWSPI);
+       pthread_priority_t pp = 0;
+       thread_qos_t qos;
+
+       if (flags & WQ_FLAG_THREAD_KEVENT) {
+               pp |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+       }
+       if (flags & WQ_FLAG_THREAD_EVENT_MANAGER) {
+               return pp | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+       }
 
 
-       bool thread_reuse = flags & WQ_FLAG_THREAD_REUSE;
-       bool overcommit = flags & WQ_FLAG_THREAD_OVERCOMMIT;
-       bool kevent = flags & WQ_FLAG_THREAD_KEVENT;
-       bool workloop = (flags & WQ_FLAG_THREAD_WORKLOOP) &&
-                       __libdispatch_workloopfunction != NULL;
-       PTHREAD_ASSERT((!kevent) || (__libdispatch_keventfunction != NULL));
-       PTHREAD_ASSERT(!workloop || kevent);
+       if (flags & WQ_FLAG_THREAD_OVERCOMMIT) {
+               pp |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
+       }
+       if (flags & WQ_FLAG_THREAD_PRIO_QOS) {
+               qos = (thread_qos_t)(flags & WQ_FLAG_THREAD_PRIO_MASK);
+               pp = _pthread_priority_make_from_thread_qos(qos, 0, pp);
+       } else if (flags & WQ_FLAG_THREAD_PRIO_SCHED) {
+               pp |= _PTHREAD_PRIORITY_SCHED_PRI_MASK;
+               pp |= (flags & WQ_FLAG_THREAD_PRIO_MASK);
+       } else {
+               PTHREAD_INTERNAL_CRASH(flags, "Missing priority");
+       }
+       return pp;
+}
 
 
-       pthread_priority_t priority = 0;
-       unsigned long priority_flags = 0;
+PTHREAD_NOINLINE
+static void
+_pthread_wqthread_setup(pthread_t self, mach_port_t kport, void *stacklowaddr,
+               int flags)
+{
+       void *stackaddr = self;
+       size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr;
 
 
-       if (overcommit)
-               priority_flags |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-       if (flags & WQ_FLAG_THREAD_EVENT_MANAGER)
-               priority_flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-       if (kevent)
-               priority_flags |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+       _pthread_struct_init(self, &_pthread_attr_default, stackaddr, stacksize,
+                       PTHREAD_ALLOCADDR(stackaddr, stacksize),
+                       PTHREAD_ALLOCSIZE(stackaddr, stacksize));
 
 
-       int thread_class = flags & WQ_FLAG_THREAD_PRIOMASK;
-       priority = _pthread_priority_make_newest(thread_class, 0, priority_flags);
+       _pthread_set_kernel_thread(self, kport);
+       self->wqthread = 1;
+       self->wqkillset = 0;
+       self->tl_joinable = false;
+       self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
 
 
-       if (!thread_reuse) {
-               // New thread created by kernel, needs initialization.
-               void *stackaddr = self;
-               size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr;
+       // Update the running thread count and set childrun bit.
+       bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET);
+       _pthread_set_self_internal(self, !thread_tsd_base_set);
+       __pthread_add_thread(self, false);
+       __pthread_started_thread(self);
+}
 
 
-               _pthread_struct_init(self, &_pthread_attr_default,
-                                                        stackaddr, stacksize,
-                                                        PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize));
+PTHREAD_NORETURN PTHREAD_NOINLINE
+static void
+_pthread_wqthread_exit(pthread_t self)
+{
+       pthread_priority_t pp;
+       thread_qos_t qos;
 
 
-               _pthread_set_kernel_thread(self, kport);
-               self->wqthread = 1;
-               self->wqkillset = 0;
-               self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
+       pp = (pthread_priority_t)self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
+       qos = _pthread_priority_thread_qos(pp);
+       if (qos == THREAD_QOS_UNSPECIFIED || qos > WORKQ_THREAD_QOS_CLEANUP) {
+               // Reset QoS to something low for the cleanup process
+               pp = _pthread_priority_make_from_thread_qos(WORKQ_THREAD_QOS_CLEANUP, 0, 0);
+               self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp;
+       }
 
 
-               // Not a joinable thread.
-               self->detached &= ~PTHREAD_CREATE_JOINABLE;
-               self->detached |= PTHREAD_CREATE_DETACHED;
+       _pthread_exit(self, NULL);
+}
 
 
-               // Update the running thread count and set childrun bit.
-               bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET);
-               _pthread_set_self_internal(self, !thread_tsd_base_set);
-               _pthread_introspection_thread_create(self, false);
-               __pthread_add_thread(self, NULL, false, false);
+// workqueue entry point from kernel
+void
+_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr,
+               void *keventlist, int flags, int nkevents)
+{
+       if ((flags & WQ_FLAG_THREAD_REUSE) == 0) {
+               _pthread_wqthread_setup(self, kport, stacklowaddr, flags);
        }
 
        }
 
-       // If we're running with fine-grained priority, we also need to
-       // set this thread to have the QoS class provided to use by the kernel
-       if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(thread_class, 0, priority_flags));
+       pthread_priority_t pp;
+       if (flags & WQ_FLAG_THREAD_OUTSIDEQOS) {
+               self->wqoutsideqos = 1;
+               pp = _pthread_priority_make_from_thread_qos(THREAD_QOS_LEGACY, 0,
+                               _PTHREAD_PRIORITY_FALLBACK_FLAG);
+       } else {
+               self->wqoutsideqos = 0;
+               pp = _pthread_wqthread_priority(flags);
        }
 
        }
 
-#if WQ_DEBUG
-       PTHREAD_ASSERT(self);
-       PTHREAD_ASSERT(self == pthread_self());
-#endif // WQ_DEBUG
+       self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp;
 
 
-       if (workloop) {
+       // avoid spills on the stack hard to keep used stack space minimal
+       if (nkevents == WORKQ_EXIT_THREAD_NKEVENT) {
+               goto exit;
+       } else if (flags & WQ_FLAG_THREAD_WORKLOOP) {
                self->fun = (void *(*)(void*))__libdispatch_workloopfunction;
                self->fun = (void *(*)(void*))__libdispatch_workloopfunction;
-       } else if (kevent){
+               self->wq_retop = WQOPS_THREAD_WORKLOOP_RETURN;
+               self->wq_kqid_ptr = ((kqueue_id_t *)keventlist - 1);
+               self->arg = keventlist;
+               self->wq_nevents = nkevents;
+       } else if (flags & WQ_FLAG_THREAD_KEVENT) {
                self->fun = (void *(*)(void*))__libdispatch_keventfunction;
                self->fun = (void *(*)(void*))__libdispatch_keventfunction;
+               self->wq_retop = WQOPS_THREAD_KEVENT_RETURN;
+               self->wq_kqid_ptr = NULL;
+               self->arg = keventlist;
+               self->wq_nevents = nkevents;
        } else {
                self->fun = (void *(*)(void*))__libdispatch_workerfunction;
        } else {
                self->fun = (void *(*)(void*))__libdispatch_workerfunction;
+               self->wq_retop = WQOPS_THREAD_RETURN;
+               self->wq_kqid_ptr = NULL;
+               self->arg = (void *)(uintptr_t)pp;
+               self->wq_nevents = 0;
+               if (os_likely(__workq_newapi)) {
+                       (*__libdispatch_workerfunction)(pp);
+               } else {
+                       _pthread_wqthread_legacy_worker_wrap(pp);
+               }
+               goto just_return;
        }
        }
-       self->arg = (void *)(uintptr_t)thread_class;
-
-       if (kevent && keventlist && nkevents > 0){
-               int errors_out;
-       kevent_errors_retry:
 
 
-               if (workloop) {
-                       kqueue_id_t kevent_id = *(kqueue_id_t*)((char*)keventlist - sizeof(kqueue_id_t));
-                       kqueue_id_t kevent_id_in = kevent_id;
-                       (__libdispatch_workloopfunction)(&kevent_id, &keventlist, &nkevents);
-                       PTHREAD_ASSERT(kevent_id == kevent_id_in || nkevents == 0);
-                       errors_out = __workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, keventlist, nkevents, 0);
+       if (nkevents > 0) {
+kevent_errors_retry:
+               if (self->wq_retop == WQOPS_THREAD_WORKLOOP_RETURN) {
+                       ((pthread_workqueue_function_workloop_t)self->fun)
+                                       (self->wq_kqid_ptr, &self->arg, &self->wq_nevents);
                } else {
                } else {
-                       (__libdispatch_keventfunction)(&keventlist, &nkevents);
-                       errors_out = __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, keventlist, nkevents, 0);
+                       ((pthread_workqueue_function_kevent_t)self->fun)
+                                       (&self->arg, &self->wq_nevents);
                }
                }
-
-               if (errors_out > 0){
-                       nkevents = errors_out;
+               int rc = __workq_kernreturn(self->wq_retop, self->arg, self->wq_nevents, 0);
+               if (os_unlikely(rc > 0)) {
+                       self->wq_nevents = rc;
                        goto kevent_errors_retry;
                        goto kevent_errors_retry;
-               } else if (errors_out < 0){
-                       PTHREAD_ABORT("kevent return produced an error: %d", errno);
-               }
-               goto thexit;
-    } else if (kevent){
-               if (workloop) {
-                       (__libdispatch_workloopfunction)(0, NULL, NULL);
-                       __workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, NULL, 0, -1);
-               } else {
-                       (__libdispatch_keventfunction)(NULL, NULL);
-                       __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, NULL, 0, 0);
                }
                }
-
-               goto thexit;
-    }
-
-       if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-               if (!__workq_newapi) {
-                       /* Old thread priorities are inverted from where we have them in
-                        * the new flexible priority scheme. The highest priority is zero,
-                        * up to 2, with background at 3.
-                        */
-                       pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
-
-                       int opts = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
-
-                       if ((__pthread_supported_features & PTHREAD_FEATURE_QOS_DEFAULT) == 0) {
-                               /* Dirty hack to support kernels that don't have QOS_CLASS_DEFAULT. */
-                               switch (thread_class) {
-                                       case QOS_CLASS_USER_INTERACTIVE:
-                                               thread_class = QOS_CLASS_USER_INITIATED;
-                                               break;
-                                       case QOS_CLASS_USER_INITIATED:
-                                               thread_class = QOS_CLASS_DEFAULT;
-                                               break;
-                                       default:
-                                               break;
-                               }
-                       }
-
-                       switch (thread_class) {
-                               /* QOS_CLASS_USER_INTERACTIVE is not currently requested by for old dispatch priority compatibility */
-                               case QOS_CLASS_USER_INITIATED:
-                                       (*func)(WORKQ_HIGH_PRIOQUEUE, opts, NULL);
-                                       break;
-
-                               case QOS_CLASS_DEFAULT:
-                                       /* B&I builders can't pass a QOS_CLASS_DEFAULT thread to dispatch, for fear of the QoS being
-                                        * picked up by NSThread (et al) and transported around the system. So change the TSD to
-                                        * make this thread look like QOS_CLASS_USER_INITIATED even though it will still run as legacy.
-                                        */
-                                       _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, 0));
-                                       (*func)(WORKQ_DEFAULT_PRIOQUEUE, opts, NULL);
-                                       break;
-
-                               case QOS_CLASS_UTILITY:
-                                       (*func)(WORKQ_LOW_PRIOQUEUE, opts, NULL);
-                                       break;
-
-                               case QOS_CLASS_BACKGROUND:
-                                       (*func)(WORKQ_BG_PRIOQUEUE, opts, NULL);
-                                       break;
-
-                               /* Legacy dispatch does not use QOS_CLASS_MAINTENANCE, so no need to handle it here */
-                       }
-
-               } else {
-                       /* "New" API, where dispatch is expecting to be given the thread priority */
-                       (*__libdispatch_workerfunction)(priority);
+               if (os_unlikely(rc < 0)) {
+                       PTHREAD_INTERNAL_CRASH(self->err_no, "kevent (workloop) failed");
                }
        } else {
                }
        } else {
-               /* We're the new library running on an old kext, so thread_class is really the workq priority. */
-               pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
-               int options = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
-               (*func)(thread_class, options, NULL);
-       }
-
-       __workq_kernreturn(WQOPS_THREAD_RETURN, NULL, 0, 0);
-
-thexit:
-       {
-               pthread_priority_t current_priority = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-               if ((current_priority & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) ||
-                       (_pthread_priority_get_qos_newest(current_priority) > WQ_THREAD_CLEANUP_QOS)) {
-                       // Reset QoS to something low for the cleanup process
-                       priority = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
-                       _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
-               }
+just_return:
+               __workq_kernreturn(self->wq_retop, NULL, 0, 0);
        }
 
        }
 
-       _pthread_exit(self, NULL);
+exit:
+       _pthread_wqthread_exit(self);
 }
 
 }
 
-/***** pthread workqueue API for libdispatch *****/
+
+#pragma mark pthread workqueue API for libdispatch
+
 
 _Static_assert(WORKQ_KEVENT_EVENT_BUFFER_LEN == WQ_KEVENT_LIST_LEN,
                "Kernel and userland should agree on the event list size");
 
 _Static_assert(WORKQ_KEVENT_EVENT_BUFFER_LEN == WQ_KEVENT_LIST_LEN,
                "Kernel and userland should agree on the event list size");
@@ -2329,42 +2365,18 @@ pthread_workqueue_addthreads_np(int queue_priority, int options, int numthreads)
        }
 
        pthread_priority_t kp = 0;
        }
 
        pthread_priority_t kp = 0;
+       int compat_priority = queue_priority & WQ_FLAG_THREAD_PRIO_MASK;
+       int flags = 0;
 
 
-       if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-               /* The new kernel API takes the new QoS class + relative priority style of
-                * priority. This entry point is here for compatibility with old libdispatch
-                * versions (ie. the simulator). We request the corresponding new bracket
-                * from the kernel, then on the way out run all dispatch queues that were
-                * requested.
-                */
-
-               int compat_priority = queue_priority & WQ_FLAG_THREAD_PRIOMASK;
-               int flags = 0;
-
-               /* To make sure the library does not issue more threads to dispatch than
-                * were requested, the total number of active requests is recorded in
-                * __workq_requests.
-                */
-               if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
-                       flags = _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-               }
+       if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
+               flags = _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
+       }
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
-               kp = _pthread_qos_class_encode_workqueue(compat_priority, flags);
+       kp = _pthread_qos_class_encode_workqueue(compat_priority, flags);
 #pragma clang diagnostic pop
 
 #pragma clang diagnostic pop
 
-       } else {
-               /* Running on the old kernel, queue_priority is what we pass directly to
-                * the syscall.
-                */
-               kp = queue_priority & WQ_FLAG_THREAD_PRIOMASK;
-
-               if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
-                       kp |= WORKQUEUE_OVERCOMMIT;
-               }
-       }
-
        res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)kp);
        if (res == -1) {
                res = errno;
        res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)kp);
        if (res == -1) {
                res = errno;
@@ -2391,9 +2403,17 @@ _pthread_workqueue_addthreads(int numthreads, pthread_priority_t priority)
                return EPERM;
        }
 
                return EPERM;
        }
 
-       if ((__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) == 0) {
-               return ENOTSUP;
-       }
+#if TARGET_OS_OSX
+       // <rdar://problem/37687655> Legacy simulators fail to boot
+       //
+       // Older sims set the deprecated _PTHREAD_PRIORITY_ROOTQUEUE_FLAG wrongly,
+       // which is aliased to _PTHREAD_PRIORITY_SCHED_PRI_FLAG and that XNU
+       // validates and rejects.
+       //
+       // As a workaround, forcefully unset this bit that cannot be set here
+       // anyway.
+       priority &= ~_PTHREAD_PRIORITY_SCHED_PRI_FLAG;
+#endif
 
        res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)priority);
        if (res == -1) {
 
        res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)priority);
        if (res == -1) {
@@ -2412,9 +2432,62 @@ _pthread_workqueue_set_event_manager_priority(pthread_priority_t priority)
        return res;
 }
 
        return res;
 }
 
-/*
- * Introspection SPI for libpthread.
- */
+int
+_pthread_workloop_create(uint64_t workloop_id, uint64_t options, pthread_attr_t *attr)
+{
+       struct kqueue_workloop_params params = {
+               .kqwlp_version = sizeof(struct kqueue_workloop_params),
+               .kqwlp_id = workloop_id,
+               .kqwlp_flags = 0,
+       };
+
+       if (!attr) {
+               return EINVAL;
+       }
+
+       if (attr->schedset) {
+               params.kqwlp_flags |= KQ_WORKLOOP_CREATE_SCHED_PRI;
+               params.kqwlp_sched_pri = attr->param.sched_priority;
+       }
+
+       if (attr->policyset) {
+               params.kqwlp_flags |= KQ_WORKLOOP_CREATE_SCHED_POL;
+               params.kqwlp_sched_pol = attr->policy;
+       }
+
+       if (attr->cpupercentset) {
+               params.kqwlp_flags |= KQ_WORKLOOP_CREATE_CPU_PERCENT;
+               params.kqwlp_cpu_percent = attr->cpupercent;
+               params.kqwlp_cpu_refillms = attr->refillms;
+       }
+
+       int res = __kqueue_workloop_ctl(KQ_WORKLOOP_CREATE, 0, &params,
+                       sizeof(params));
+       if (res == -1) {
+               res = errno;
+       }
+       return res;
+}
+
+int
+_pthread_workloop_destroy(uint64_t workloop_id)
+{
+       struct kqueue_workloop_params params = {
+               .kqwlp_version = sizeof(struct kqueue_workloop_params),
+               .kqwlp_id = workloop_id,
+       };
+
+       int res = __kqueue_workloop_ctl(KQ_WORKLOOP_DESTROY, 0, &params,
+                       sizeof(params));
+       if (res == -1) {
+               res = errno;
+       }
+       return res;
+}
+
+
+#pragma mark Introspection SPI for libpthread.
+
 
 static pthread_introspection_hook_t _pthread_introspection_hook;
 
 
 static pthread_introspection_hook_t _pthread_introspection_hook;
 
@@ -2428,19 +2501,17 @@ pthread_introspection_hook_install(pthread_introspection_hook_t hook)
 
 PTHREAD_NOINLINE
 static void
 
 PTHREAD_NOINLINE
 static void
-_pthread_introspection_hook_callout_thread_create(pthread_t t, bool destroy)
+_pthread_introspection_hook_callout_thread_create(pthread_t t)
 {
        _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_CREATE, t, t,
                        PTHREAD_SIZE);
 {
        _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_CREATE, t, t,
                        PTHREAD_SIZE);
-       if (!destroy) return;
-       _pthread_introspection_thread_destroy(t);
 }
 
 static inline void
 }
 
 static inline void
-_pthread_introspection_thread_create(pthread_t t, bool destroy)
+_pthread_introspection_thread_create(pthread_t t)
 {
        if (os_fastpath(!_pthread_introspection_hook)) return;
 {
        if (os_fastpath(!_pthread_introspection_hook)) return;
-       _pthread_introspection_hook_callout_thread_create(t, destroy);
+       _pthread_introspection_hook_callout_thread_create(t);
 }
 
 PTHREAD_NOINLINE
 }
 
 PTHREAD_NOINLINE
@@ -2449,8 +2520,9 @@ _pthread_introspection_hook_callout_thread_start(pthread_t t)
 {
        size_t freesize;
        void *freeaddr;
 {
        size_t freesize;
        void *freeaddr;
-       if (t == &_thread) {
-               freesize = t->stacksize + t->guardsize;
+       if (t == main_thread()) {
+               size_t stacksize = t->stackaddr - t->stackbottom;
+               freesize = stacksize + t->guardsize;
                freeaddr = t->stackaddr - freesize;
        } else {
                freesize = t->freesize - PTHREAD_SIZE;
                freeaddr = t->stackaddr - freesize;
        } else {
                freesize = t->freesize - PTHREAD_SIZE;
@@ -2469,32 +2541,33 @@ _pthread_introspection_thread_start(pthread_t t)
 
 PTHREAD_NOINLINE
 static void
 
 PTHREAD_NOINLINE
 static void
-_pthread_introspection_hook_callout_thread_terminate(pthread_t t,
-               void *freeaddr, size_t freesize, bool destroy)
+_pthread_introspection_hook_callout_thread_terminate(pthread_t t)
 {
 {
-       if (destroy && freesize) {
-               freesize -= PTHREAD_SIZE;
+       size_t freesize;
+       void *freeaddr;
+       if (t == main_thread()) {
+               size_t stacksize = t->stackaddr - t->stackbottom;
+               freesize = stacksize + t->guardsize;
+               freeaddr = t->stackaddr - freesize;
+       } else {
+               freesize = t->freesize - PTHREAD_SIZE;
+               freeaddr = t->freeaddr;
        }
        _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_TERMINATE, t,
                        freeaddr, freesize);
        }
        _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_TERMINATE, t,
                        freeaddr, freesize);
-       if (!destroy) return;
-       _pthread_introspection_thread_destroy(t);
 }
 
 static inline void
 }
 
 static inline void
-_pthread_introspection_thread_terminate(pthread_t t, void *freeaddr,
-               size_t freesize, bool destroy)
+_pthread_introspection_thread_terminate(pthread_t t)
 {
        if (os_fastpath(!_pthread_introspection_hook)) return;
 {
        if (os_fastpath(!_pthread_introspection_hook)) return;
-       _pthread_introspection_hook_callout_thread_terminate(t, freeaddr, freesize,
-                       destroy);
+       _pthread_introspection_hook_callout_thread_terminate(t);
 }
 
 PTHREAD_NOINLINE
 static void
 _pthread_introspection_hook_callout_thread_destroy(pthread_t t)
 {
 }
 
 PTHREAD_NOINLINE
 static void
 _pthread_introspection_hook_callout_thread_destroy(pthread_t t)
 {
-       if (t == &_thread) return;
        _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_DESTROY, t, t,
                        PTHREAD_SIZE);
 }
        _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_DESTROY, t, t,
                        PTHREAD_SIZE);
 }
@@ -2506,3 +2579,37 @@ _pthread_introspection_thread_destroy(pthread_t t)
        _pthread_introspection_hook_callout_thread_destroy(t);
 }
 
        _pthread_introspection_hook_callout_thread_destroy(t);
 }
 
+#pragma mark libplatform shims
+
+#include <platform/string.h>
+
+// pthread_setup initializes large structures to 0,
+// which the compiler turns into a library call to memset.
+//
+// To avoid linking against Libc, provide a simple wrapper
+// that calls through to the libplatform primitives
+
+#undef memset
+PTHREAD_NOEXPORT
+void *
+memset(void *b, int c, size_t len)
+{
+       return _platform_memset(b, c, len);
+}
+
+#undef bzero
+PTHREAD_NOEXPORT
+void
+bzero(void *s, size_t n)
+{
+       _platform_bzero(s, n);
+}
+
+#undef memcpy
+PTHREAD_NOEXPORT
+void *
+memcpy(void* a, const void* b, unsigned long s)
+{
+       return _platform_memmove(a, b, s);
+}
+
index 8fc11c7187690b6f937f8edf8f1214e8f0e87478..90afe461bfec6a8168ae7a1c616940160d91f9d8 100644 (file)
@@ -21,6 +21,8 @@
  * @APPLE_LICENSE_HEADER_END@
  */
 
  * @APPLE_LICENSE_HEADER_END@
  */
 
+#include "offsets.h"
+
 #if defined(__x86_64__)
 
 #include <mach/i386/syscall_sw.h>
 #if defined(__x86_64__)
 
 #include <mach/i386/syscall_sw.h>
@@ -49,6 +51,51 @@ _thread_start:
        leave
        ret
 
        leave
        ret
 
+       .align 2, 0x90
+       .globl _thread_chkstk_darwin
+_thread_chkstk_darwin:
+       .globl ____chkstk_darwin
+____chkstk_darwin: // %rax == alloca size
+       pushq  %rcx
+       leaq   0x10(%rsp), %rcx
+
+       // validate that the frame pointer is on our stack (no alt stack)
+       cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET
+       jb     Lprobe
+       cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET
+       jae    Lprobe
+
+       // validate alloca size
+       subq   %rax, %rcx
+       jb     Lcrash
+       cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET
+       ja     Lcrash
+
+       popq   %rcx
+       retq
+
+Lprobe:
+       // probe the stack when it's not ours (altstack or some shenanigan)
+       cmpq   $0x1000, %rax
+       jb     Lend
+       pushq  %rax
+Lloop:
+       subq   $0x1000, %rcx
+       testq  %rcx, (%rcx)
+       subq   $0x1000, %rax
+       cmpq   $0x1000, %rax
+       ja     Lloop
+       popq   %rax
+Lend:
+       subq   %rax, %rcx
+       testq  %rcx, (%rcx)
+
+       popq   %rcx
+       retq
+
+Lcrash:
+       ud2
+
 #endif
 
 #elif defined(__i386__)
 #endif
 
 #elif defined(__i386__)
@@ -91,6 +138,56 @@ _thread_start:
        leave
        ret
 
        leave
        ret
 
+       .align 2, 0x90
+       .globl _thread_chkstk_darwin
+_thread_chkstk_darwin:
+       .globl ____chkstk_darwin
+____chkstk_darwin: // %eax == alloca size
+       pushl  %ecx
+       pushl  %edx
+       leal   0xc(%esp), %ecx
+
+       // validate that the frame pointer is on our stack (no alt stack)
+       movl   %gs:0x0, %edx    // pthread_self()
+       cmpl   %ecx, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET(%edx)
+       jb     Lprobe
+       movl   _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET(%edx), %edx
+       cmpl   %ecx, %edx
+       jae    Lprobe
+
+       // validate alloca size
+       subl   %eax, %ecx
+       jb     Lcrash
+       cmpl   %ecx, %edx
+       ja     Lcrash
+
+       popl   %edx
+       popl   %ecx
+       retl
+
+Lprobe:
+       // probe the stack when it's not ours (altstack or some shenanigan)
+       cmpl   $0x1000, %eax
+       jb     Lend
+       pushl  %eax
+Lloop:
+       subl   $0x1000, %ecx
+       testl  %ecx, (%ecx)
+       subl   $0x1000, %eax
+       cmpl   $0x1000, %eax
+       ja     Lloop
+       popl   %eax
+Lend:
+       subl   %eax, %ecx
+       testl  %ecx, (%ecx)
+
+       popl   %edx
+       popl   %ecx
+       retl
+
+Lcrash:
+       ud2
+
 #endif
 
 #elif defined(__arm__)
 #endif
 
 #elif defined(__arm__)
index 894178c810e0ef8ce6336efc16165fe3d94cf5bd..8bb9c08a7c3e1aa1a418c1c649a85cb046a02e96 100644 (file)
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
+#include <sys/ulock.h>
 #include <machine/vmparam.h>
 #include <mach/vm_statistics.h>
 
 #include <machine/vmparam.h>
 #include <mach/vm_statistics.h>
 
-extern int __unix_conforming;
 extern int _pthread_cond_wait(pthread_cond_t *cond,
                        pthread_mutex_t *mutex,
                        const struct timespec *abstime,
 extern int _pthread_cond_wait(pthread_cond_t *cond,
                        pthread_mutex_t *mutex,
                        const struct timespec *abstime,
@@ -73,16 +73,27 @@ extern int __sigwait(const sigset_t *set, int *sig);
 extern int __pthread_sigmask(int, const sigset_t *, sigset_t *);
 extern int __pthread_markcancel(mach_port_t);
 extern int __pthread_canceled(int);
 extern int __pthread_sigmask(int, const sigset_t *, sigset_t *);
 extern int __pthread_markcancel(mach_port_t);
 extern int __pthread_canceled(int);
+extern int __semwait_signal_nocancel(int, int, int, int, __int64_t, __int32_t);
 
 
-#ifdef VARIANT_CANCELABLE
-extern int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, __int64_t tv_sec, __int32_t tv_nsec);
-#else
-extern int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, __int64_t tv_sec, __int32_t tv_nsec)  __asm__("___semwait_signal_nocancel");
-#endif
 
 PTHREAD_NOEXPORT
 
 PTHREAD_NOEXPORT
-int _pthread_join(pthread_t thread, void **value_ptr, int conforming,
-               int (*_semwait_signal)(int, int, int, int, __int64_t, __int32_t));
+int _pthread_join(pthread_t thread, void **value_ptr, int conforming);
+
+static inline int
+_pthread_conformance(void)
+{
+#if __DARWIN_UNIX03
+       if (__unix_conforming == 0)
+               __unix_conforming = 1;
+#ifdef VARIANT_CANCELABLE
+       return PTHREAD_CONFORM_UNIX03_CANCELABLE;
+#else /* !VARIANT_CANCELABLE */
+       return PTHREAD_CONFORM_UNIX03_NOCANCEL;
+#endif
+#else /* __DARWIN_UNIX03 */
+       return PTHREAD_CONFORM_DARWIN_LEGACY;
+#endif /* __DARWIN_UNIX03 */
+}
 
 #ifndef VARIANT_CANCELABLE
 
 
 #ifndef VARIANT_CANCELABLE
 
@@ -111,7 +122,7 @@ pthread_cancel(pthread_t thread)
                __unix_conforming = 1;
 #endif /* __DARWIN_UNIX03 */
 
                __unix_conforming = 1;
 #endif /* __DARWIN_UNIX03 */
 
-       if (!_pthread_is_valid(thread, 0, NULL)) {
+       if (!_pthread_is_valid(thread, NULL)) {
                return(ESRCH);
        }
 
                return(ESRCH);
        }
 
@@ -135,15 +146,7 @@ pthread_cancel(pthread_t thread)
 void
 pthread_testcancel(void)
 {
 void
 pthread_testcancel(void)
 {
-       pthread_t self = pthread_self();
-
-#if __DARWIN_UNIX03
-       if (__unix_conforming == 0)
-               __unix_conforming = 1;
-       _pthread_testcancel(self, 1);
-#else /* __DARWIN_UNIX03 */
-       _pthread_testcancel(self, 0);
-#endif /* __DARWIN_UNIX03 */
+       _pthread_testcancel(_pthread_conformance());
 }
 
 #ifndef BUILDING_VARIANT /* [ */
 }
 
 #ifndef BUILDING_VARIANT /* [ */
@@ -154,23 +157,32 @@ _pthread_exit_if_canceled(int error)
 {
        if (((error & 0xff) == EINTR) && __unix_conforming && (__pthread_canceled(0) == 0)) {
                pthread_t self = pthread_self();
 {
        if (((error & 0xff) == EINTR) && __unix_conforming && (__pthread_canceled(0) == 0)) {
                pthread_t self = pthread_self();
-               if (self != NULL) {
-                       self->cancel_error = error;
-               }
+
+               self->cancel_error = error;
+               self->canceled = true;
                pthread_exit(PTHREAD_CANCELED);
        }
 }
 
 
                pthread_exit(PTHREAD_CANCELED);
        }
 }
 
 
-PTHREAD_NOEXPORT_VARIANT
-void
-_pthread_testcancel(pthread_t thread, int isconforming)
+static inline bool
+_pthread_is_canceled(pthread_t thread)
 {
        const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
 {
        const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-
        int state = os_atomic_load2o(thread, cancel_state, seq_cst);
        int state = os_atomic_load2o(thread, cancel_state, seq_cst);
-       if ((state & flags) == flags) {
-               pthread_exit(isconforming ? PTHREAD_CANCELED : 0);
+       return (state & flags) == flags;
+}
+
+PTHREAD_NOEXPORT_VARIANT
+void
+_pthread_testcancel(int isconforming)
+{
+       pthread_t self = pthread_self();
+       if (_pthread_is_canceled(self)) {
+               // 4597450: begin
+               self->canceled = (isconforming != PTHREAD_CONFORM_DARWIN_LEGACY);
+               // 4597450: end
+               pthread_exit(isconforming ? PTHREAD_CANCELED : NULL);
        }
 }
 
        }
 }
 
@@ -179,7 +191,6 @@ void
 _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport)
 {
        const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
 _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport)
 {
        const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-
        int state = os_atomic_or2o(thread, cancel_state,
                        _PTHREAD_CANCEL_INITIALIZED, relaxed);
        if ((state & flags) == flags && __unix_conforming) {
        int state = os_atomic_or2o(thread, cancel_state,
                        _PTHREAD_CANCEL_INITIALIZED, relaxed);
        if ((state & flags) == flags && __unix_conforming) {
@@ -187,35 +198,14 @@ _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport)
        }
 }
 
        }
 }
 
-PTHREAD_NOEXPORT
-void *
-_pthread_get_exit_value(pthread_t thread, int conforming)
-{
-       const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-       void *value = thread->exit_value;
-
-       if (conforming) {
-               int state = os_atomic_load2o(thread, cancel_state, seq_cst);
-               if ((state & flags) == flags) {
-                       value = PTHREAD_CANCELED;
-               }
-       }
-       return value;
-}
-
 /* When a thread exits set the cancellation state to DISABLE and DEFERRED */
 PTHREAD_NOEXPORT
 void
 /* When a thread exits set the cancellation state to DISABLE and DEFERRED */
 PTHREAD_NOEXPORT
 void
-_pthread_setcancelstate_exit(pthread_t thread, void *value_ptr, int conforming)
+_pthread_setcancelstate_exit(pthread_t thread, void *value_ptr)
 {
        _pthread_update_cancel_state(thread,
                        _PTHREAD_CANCEL_STATE_MASK | _PTHREAD_CANCEL_TYPE_MASK,
                        PTHREAD_CANCEL_DISABLE | PTHREAD_CANCEL_DEFERRED);
 {
        _pthread_update_cancel_state(thread,
                        _PTHREAD_CANCEL_STATE_MASK | _PTHREAD_CANCEL_TYPE_MASK,
                        PTHREAD_CANCEL_DISABLE | PTHREAD_CANCEL_DEFERRED);
-       if (value_ptr == PTHREAD_CANCELED) {
-               _PTHREAD_LOCK(thread->lock);
-               thread->detached |= _PTHREAD_WASCANCEL; // 4597450
-               _PTHREAD_UNLOCK(thread->lock);
-       }
 }
 
 #endif /* !BUILDING_VARIANT ] */
 }
 
 #endif /* !BUILDING_VARIANT ] */
@@ -227,30 +217,30 @@ PTHREAD_ALWAYS_INLINE
 static inline int
 _pthread_setcancelstate_internal(int state, int *oldstateptr, int conforming)
 {
 static inline int
 _pthread_setcancelstate_internal(int state, int *oldstateptr, int conforming)
 {
-       pthread_t self;
+       pthread_t self = pthread_self();
 
        switch (state) {
 
        switch (state) {
-               case PTHREAD_CANCEL_ENABLE:
-                       if (conforming) {
-                               __pthread_canceled(1);
-                       }
-                       break;
-               case PTHREAD_CANCEL_DISABLE:
-                       if (conforming) {
-                               __pthread_canceled(2);
-                       }
-                       break;
-               default:
-                       return EINVAL;
+       case PTHREAD_CANCEL_ENABLE:
+               if (conforming) {
+                       __pthread_canceled(1);
+               }
+               break;
+       case PTHREAD_CANCEL_DISABLE:
+               if (conforming) {
+                       __pthread_canceled(2);
+               }
+               break;
+       default:
+               return EINVAL;
        }
 
        }
 
-       self = pthread_self();
        int oldstate = _pthread_update_cancel_state(self, _PTHREAD_CANCEL_STATE_MASK, state);
        if (oldstateptr) {
                *oldstateptr = oldstate & _PTHREAD_CANCEL_STATE_MASK;
        }
        if (!conforming) {
        int oldstate = _pthread_update_cancel_state(self, _PTHREAD_CANCEL_STATE_MASK, state);
        if (oldstateptr) {
                *oldstateptr = oldstate & _PTHREAD_CANCEL_STATE_MASK;
        }
        if (!conforming) {
-               _pthread_testcancel(self, 0);  /* See if we need to 'die' now... */
+               /* See if we need to 'die' now... */
+               _pthread_testcancel(PTHREAD_CONFORM_DARWIN_LEGACY);
        }
        return 0;
 }
        }
        return 0;
 }
@@ -292,7 +282,8 @@ pthread_setcanceltype(int type, int *oldtype)
                *oldtype = oldstate & _PTHREAD_CANCEL_TYPE_MASK;
        }
 #if !__DARWIN_UNIX03
                *oldtype = oldstate & _PTHREAD_CANCEL_TYPE_MASK;
        }
 #if !__DARWIN_UNIX03
-       _pthread_testcancel(self, 0);  /* See if we need to 'die' now... */
+       /* See if we need to 'die' now... */
+       _pthread_testcancel(PTHREAD_CONFORM_DARWIN_LEGACY);
 #endif /* __DARWIN_UNIX03 */
        return (0);
 }
 #endif /* __DARWIN_UNIX03 */
        return (0);
 }
@@ -315,76 +306,196 @@ pthread_sigmask(int how, const sigset_t * set, sigset_t * oset)
 
 #ifndef BUILDING_VARIANT /* [ */
 
 
 #ifndef BUILDING_VARIANT /* [ */
 
-static void
-__posix_join_cleanup(void *arg)
+typedef struct pthread_join_context_s {
+       pthread_t   waiter;
+       void      **value_ptr;
+       mach_port_t kport;
+       semaphore_t custom_stack_sema;
+       bool        detached;
+} pthread_join_context_s, *pthread_join_context_t;
+
+static inline void *
+_pthread_get_exit_value(pthread_t thread)
 {
 {
-       pthread_t thread = (pthread_t)arg;
+       if (__unix_conforming && _pthread_is_canceled(thread)) {
+               return PTHREAD_CANCELED;
+       }
+       return thread->tl_exit_value;
+}
 
 
-       _PTHREAD_LOCK(thread->lock);
-       /* leave another thread to join */
-       thread->joiner = (struct _pthread *)NULL;
-       _PTHREAD_UNLOCK(thread->lock);
+// called with _pthread_list_lock held
+PTHREAD_NOEXPORT
+semaphore_t
+_pthread_joiner_prepost_wake(pthread_t thread)
+{
+       pthread_join_context_t ctx = thread->tl_join_ctx;
+       semaphore_t sema = MACH_PORT_NULL;
+
+       if (thread->tl_joinable) {
+               sema = ctx->custom_stack_sema;
+               thread->tl_joinable = false;
+       } else {
+               ctx->detached = true;
+               thread->tl_join_ctx = NULL;
+       }
+       if (ctx->value_ptr) *ctx->value_ptr = _pthread_get_exit_value(thread);
+       return sema;
+}
+
+static inline bool
+_pthread_joiner_abort_wait(pthread_t thread, pthread_join_context_t ctx)
+{
+       bool aborted = false;
+
+       _PTHREAD_LOCK(_pthread_list_lock);
+       if (!ctx->detached && thread->tl_exit_gate != MACH_PORT_DEAD) {
+               /*
+                * _pthread_joiner_prepost_wake() didn't happen
+                * allow another thread to join
+                */
+#if DEBUG
+               PTHREAD_ASSERT(thread->tl_join_ctx == ctx);
+#endif
+               thread->tl_join_ctx = NULL;
+               thread->tl_exit_gate = MACH_PORT_NULL;
+               aborted = true;
+       }
+       _PTHREAD_UNLOCK(_pthread_list_lock);
+       return aborted;
+}
+
+static int
+_pthread_joiner_wait(pthread_t thread, pthread_join_context_t ctx, int conforming)
+{
+       uint32_t *exit_gate = &thread->tl_exit_gate;
+       int ulock_op = UL_UNFAIR_LOCK | ULF_NO_ERRNO;
+
+       if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+               ulock_op |= ULF_WAIT_CANCEL_POINT;
+       }
+
+       for (;;) {
+               uint32_t cur = os_atomic_load(exit_gate, acquire);
+               if (cur == MACH_PORT_DEAD) {
+                       break;
+               }
+               if (os_unlikely(cur != ctx->kport)) {
+                       PTHREAD_CLIENT_CRASH(cur, "pthread_join() state corruption");
+               }
+               int ret = __ulock_wait(ulock_op, exit_gate, ctx->kport, 0);
+               switch (-ret) {
+               case 0:
+               case EFAULT:
+                       break;
+               case EINTR:
+                       /*
+                        * POSIX says:
+                        *
+                        *   As specified, either the pthread_join() call is canceled, or it
+                        *   succeeds, but not both. The difference is obvious to the
+                        *   application, since either a cancellation handler is run or
+                        *   pthread_join() returns.
+                        *
+                        * When __ulock_wait() returns EINTR, we check if we have been
+                        * canceled, and if we have, we try to abort the wait.
+                        *
+                        * If we can't, it means the other thread finished the join while we
+                        * were being canceled and commited the waiter to return from
+                        * pthread_join(). Returning from the join then takes precedence
+                        * over the cancelation which will be acted upon at the next
+                        * cancelation point.
+                        */
+                       if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE &&
+                                       _pthread_is_canceled(ctx->waiter)) {
+                               if (_pthread_joiner_abort_wait(thread, ctx)) {
+                                       ctx->waiter->canceled = true;
+                                       pthread_exit(PTHREAD_CANCELED);
+                               }
+                       }
+                       break;
+               }
+       }
+
+       bool cleanup = false;
+
+       _PTHREAD_LOCK(_pthread_list_lock);
+       // If pthread_detach() was called, we can't safely dereference the thread,
+       // else, decide who gets to deallocate the thread (see _pthread_terminate).
+       if (!ctx->detached) {
+#if DEBUG
+               PTHREAD_ASSERT(thread->tl_join_ctx == ctx);
+#endif
+               thread->tl_join_ctx = NULL;
+               cleanup = thread->tl_joiner_cleans_up;
+       }
+       _PTHREAD_UNLOCK(_pthread_list_lock);
+
+       if (cleanup) {
+               _pthread_deallocate(thread, false);
+       }
+       return 0;
 }
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
 }
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
-_pthread_join(pthread_t thread, void **value_ptr, int conforming,
-               int (*_semwait_signal)(int, int, int, int, __int64_t, __int32_t))
+_pthread_join(pthread_t thread, void **value_ptr, int conforming)
 {
 {
-       int res = 0;
        pthread_t self = pthread_self();
        pthread_t self = pthread_self();
-       kern_return_t kern_res;
-       semaphore_t joinsem, death = (semaphore_t)os_get_cached_semaphore();
+       pthread_join_context_s ctx = {
+               .waiter = self,
+               .value_ptr = value_ptr,
+               .kport = MACH_PORT_NULL,
+               .custom_stack_sema = MACH_PORT_NULL,
+       };
+       int res = 0;
+       kern_return_t kr;
 
 
-       if (!_pthread_is_valid(thread, PTHREAD_IS_VALID_LOCK_THREAD, NULL)) {
-               res = ESRCH;
-               goto out;
+       if (!_pthread_validate_thread_and_list_lock(thread)) {
+               return ESRCH;
        }
 
        }
 
-       if (thread->sig != _PTHREAD_SIG) {
-               res = ESRCH;
-       } else if ((thread->detached & PTHREAD_CREATE_DETACHED) ||
-                       !(thread->detached & PTHREAD_CREATE_JOINABLE) ||
-                       (thread->joiner != NULL)) {
+       if (!thread->tl_joinable || (thread->tl_join_ctx != NULL)) {
                res = EINVAL;
                res = EINVAL;
-       } else if (thread == self || (self != NULL && self->joiner == thread)) {
+       } else if (thread == self ||
+                       (self->tl_join_ctx && self->tl_join_ctx->waiter == thread)) {
                res = EDEADLK;
                res = EDEADLK;
+       } else if (thread->tl_exit_gate == MACH_PORT_DEAD) {
+               TAILQ_REMOVE(&__pthread_head, thread, tl_plist);
+#if DEBUG
+               PTHREAD_ASSERT(thread->tl_joiner_cleans_up);
+#endif
+               thread->tl_joinable = false;
+               if (value_ptr) *value_ptr = _pthread_get_exit_value(thread);
+       } else {
+               ctx.kport = _pthread_kernel_thread(thread);
+               thread->tl_exit_gate = ctx.kport;
+               thread->tl_join_ctx = &ctx;
+               if (thread->tl_has_custom_stack) {
+                       ctx.custom_stack_sema = (semaphore_t)os_get_cached_semaphore();
+               }
        }
        }
-       if (res != 0) {
-               _PTHREAD_UNLOCK(thread->lock);
-               goto out;
-       }
+       _PTHREAD_UNLOCK(_pthread_list_lock);
 
 
-       joinsem = thread->joiner_notify;
-       if (joinsem == SEMAPHORE_NULL) {
-               thread->joiner_notify = joinsem = death;
-               death = MACH_PORT_NULL;
+       if (res == 0) {
+               if (ctx.kport == MACH_PORT_NULL) {
+                       _pthread_deallocate(thread, false);
+               } else {
+                       res = _pthread_joiner_wait(thread, &ctx, conforming);
+               }
        }
        }
-       thread->joiner = self;
-       _PTHREAD_UNLOCK(thread->lock);
-
-       if (conforming) {
-               /* Wait for it to signal... */
-               pthread_cleanup_push(__posix_join_cleanup, (void *)thread);
-               do {
-                       res = _semwait_signal(joinsem, 0, 0, 0, 0, 0);
-               } while ((res < 0) && (errno == EINTR));
-               pthread_cleanup_pop(0);
-       } else {
-               /* Wait for it to signal... */
-               kern_return_t (*_semaphore_wait)(semaphore_t) =
-                               (void*)_semwait_signal;
+       if (res == 0 && ctx.custom_stack_sema && !ctx.detached) {
+               // threads with a custom stack need to make sure _pthread_terminate
+               // returned before the joiner is unblocked, the joiner may quickly
+               // deallocate the stack with rather dire consequences.
+               //
+               // When we reach this point we know the pthread_join has to succeed
+               // so this can't be a cancelation point.
                do {
                do {
-                       kern_res = _semaphore_wait(joinsem);
-               } while (kern_res != KERN_SUCCESS);
+                       kr = __semwait_signal_nocancel(ctx.custom_stack_sema, 0, 0, 0, 0, 0);
+               } while (kr != KERN_SUCCESS);
        }
        }
-
-       os_put_cached_semaphore((os_semaphore_t)joinsem);
-       res = _pthread_join_cleanup(thread, value_ptr, conforming);
-
-out:
-       if (death) {
-               os_put_cached_semaphore(death);
+       if (ctx.custom_stack_sema) {
+               os_put_cached_semaphore(ctx.custom_stack_sema);
        }
        return res;
 }
        }
        return res;
 }
@@ -398,82 +509,45 @@ out:
 int
 pthread_join(pthread_t thread, void **value_ptr)
 {
 int
 pthread_join(pthread_t thread, void **value_ptr)
 {
-#if __DARWIN_UNIX03
-       if (__unix_conforming == 0)
-               __unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-       _pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
-       return _pthread_join(thread, value_ptr, 1, __semwait_signal);
-#else
-       return _pthread_join(thread, value_ptr, 0, (void*)semaphore_wait);
-#endif /* __DARWIN_UNIX03 */
-
+       int conforming = _pthread_conformance();
+       if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+               _pthread_testcancel(conforming);
+       }
+       return _pthread_join(thread, value_ptr, conforming);
 }
 
 int
 }
 
 int
-pthread_cond_wait(pthread_cond_t *cond,
-                 pthread_mutex_t *mutex)
+pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
 {
 {
-       int conforming;
-#if __DARWIN_UNIX03
-
-       if (__unix_conforming == 0)
-               __unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-       conforming = 1;
-#else /* !VARIANT_CANCELABLE */
-       conforming = -1;
-#endif /* VARIANT_CANCELABLE */
-#else /* __DARWIN_UNIX03 */
-       conforming = 0;
-#endif /* __DARWIN_UNIX03 */
-       return (_pthread_cond_wait(cond, mutex, (struct timespec *)NULL, 0, conforming));
+       return _pthread_cond_wait(cond, mutex, NULL, 0, _pthread_conformance());
 }
 
 int
 }
 
 int
-pthread_cond_timedwait(pthread_cond_t *cond,
-                      pthread_mutex_t *mutex,
-                      const struct timespec *abstime)
+pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
+               const struct timespec *abstime)
 {
 {
-       int conforming;
-#if __DARWIN_UNIX03
-       if (__unix_conforming == 0)
-               __unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-       conforming = 1;
-#else /* !VARIANT_CANCELABLE */
-       conforming = -1;
-#endif /* VARIANT_CANCELABLE */
-#else /* __DARWIN_UNIX03 */
-        conforming = 0;
-#endif /* __DARWIN_UNIX03 */
-
-       return (_pthread_cond_wait(cond, mutex, abstime, 0, conforming));
+       return _pthread_cond_wait(cond, mutex, abstime, 0, _pthread_conformance());
 }
 
 int
 sigwait(const sigset_t * set, int * sig)
 {
 #if __DARWIN_UNIX03
 }
 
 int
 sigwait(const sigset_t * set, int * sig)
 {
 #if __DARWIN_UNIX03
-       int err = 0;
+       int err = 0, conformance = _pthread_conformance();
 
        if (__unix_conforming == 0)
                __unix_conforming = 1;
 
 
        if (__unix_conforming == 0)
                __unix_conforming = 1;
 
-#ifdef VARIANT_CANCELABLE
-       _pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
+       if (conformance == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+               _pthread_testcancel(conformance);
+       }
 
        if (__sigwait(set, sig) == -1) {
                err = errno;
 
 
        if (__sigwait(set, sig) == -1) {
                err = errno;
 
-#ifdef VARIANT_CANCELABLE
-               _pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
+               if (conformance == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+                       _pthread_testcancel(conformance);
+               }
 
                /*
                 * EINTR that isn't a result of pthread_cancel()
 
                /*
                 * EINTR that isn't a result of pthread_cancel()
index be55e1d164942c7ffad560259747fb93e206c999..79e38baa07b6cdc481c3bc6762466fa61a58a603 100644 (file)
@@ -59,7 +59,6 @@
 #endif /* PLOCKSTAT */
 
 extern int __gettimeofday(struct timeval *, struct timezone *);
 #endif /* PLOCKSTAT */
 
 extern int __gettimeofday(struct timeval *, struct timezone *);
-extern void _pthread_testcancel(pthread_t thread, int isconforming);
 
 PTHREAD_NOEXPORT
 int _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex,
 
 PTHREAD_NOEXPORT
 int _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex,
@@ -88,8 +87,8 @@ COND_GETSEQ_ADDR(_pthread_cond *cond,
 #ifndef BUILDING_VARIANT /* [ */
 
 static void _pthread_cond_cleanup(void *arg);
 #ifndef BUILDING_VARIANT /* [ */
 
 static void _pthread_cond_cleanup(void *arg);
-static void _pthread_cond_updateval(_pthread_cond * cond, int error,
-               uint32_t updateval);
+static void _pthread_cond_updateval(_pthread_cond *cond, _pthread_mutex *mutex,
+               int error, uint32_t updateval);
 
 
 int
 
 
 int
@@ -401,7 +400,7 @@ _pthread_cond_signal(pthread_cond_t *ocond, bool broadcast, mach_port_t thread)
        }
 
        if (updateval != (uint32_t)-1 && updateval != 0) {
        }
 
        if (updateval != (uint32_t)-1 && updateval != 0) {
-               _pthread_cond_updateval(cond, 0, updateval);
+               _pthread_cond_updateval(cond, NULL, 0, updateval);
        }
 
        return 0;
        }
 
        return 0;
@@ -449,8 +448,8 @@ pthread_cond_signal(pthread_cond_t *ocond)
  * Suspend waiting for a condition variable.
  * Note: we have to keep a list of condition variables which are using
  * this same mutex variable so we can detect invalid 'destroy' sequences.
  * Suspend waiting for a condition variable.
  * Note: we have to keep a list of condition variables which are using
  * this same mutex variable so we can detect invalid 'destroy' sequences.
- * If isconforming < 0, we skip the _pthread_testcancel(), but keep the
- * remaining conforming behavior..
+ * If conformance is not cancelable, we skip the _pthread_testcancel(),
+ * but keep the remaining conforming behavior..
  */
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
  */
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
@@ -458,7 +457,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                        pthread_mutex_t *omutex,
                        const struct timespec *abstime,
                        int isRelative,
                        pthread_mutex_t *omutex,
                        const struct timespec *abstime,
                        int isRelative,
-                       int isconforming)
+                       int conforming)
 {
        int res;
        _pthread_cond *cond = (_pthread_cond *)ocond;
 {
        int res;
        _pthread_cond *cond = (_pthread_cond *)ocond;
@@ -477,13 +476,13 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                return res;
        }
 
                return res;
        }
 
-       if (isconforming) {
+       if (conforming) {
                if (!_pthread_mutex_check_signature(mutex) &&
                                !_pthread_mutex_check_signature_init(mutex)) {
                        return EINVAL;
                }
                if (!_pthread_mutex_check_signature(mutex) &&
                                !_pthread_mutex_check_signature_init(mutex)) {
                        return EINVAL;
                }
-               if (isconforming > 0) {
-                       _pthread_testcancel(pthread_self(), 1);
+               if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+                       _pthread_testcancel(conforming);
                }
        }
 
                }
        }
 
@@ -505,7 +504,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                        if (then.tv_sec < 0 || (then.tv_sec == 0 && then.tv_nsec == 0)) {
                                return ETIMEDOUT;
                        }
                        if (then.tv_sec < 0 || (then.tv_sec == 0 && then.tv_nsec == 0)) {
                                return ETIMEDOUT;
                        }
-                       if (isconforming &&
+                       if (conforming &&
                            (abstime->tv_sec < 0 ||
                             abstime->tv_nsec < 0 ||
                             abstime->tv_nsec >= NSEC_PER_SEC)) {
                            (abstime->tv_sec < 0 ||
                             abstime->tv_nsec < 0 ||
                             abstime->tv_nsec >= NSEC_PER_SEC)) {
@@ -518,7 +517,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                                return ETIMEDOUT;
                        }
                }
                                return ETIMEDOUT;
                        }
                }
-               if (isconforming && (then.tv_sec < 0 || then.tv_nsec < 0)) {
+               if (conforming && (then.tv_sec < 0 || then.tv_nsec < 0)) {
                        return EINVAL;
                }
                if (then.tv_nsec >= NSEC_PER_SEC) {
                        return EINVAL;
                }
                if (then.tv_nsec >= NSEC_PER_SEC) {
@@ -567,10 +566,10 @@ _pthread_cond_wait(pthread_cond_t *ocond,
        cvlsgen = ((uint64_t)(ulval | savebits)<< 32) | nlval;
 
        // SUSv3 requires pthread_cond_wait to be a cancellation point
        cvlsgen = ((uint64_t)(ulval | savebits)<< 32) | nlval;
 
        // SUSv3 requires pthread_cond_wait to be a cancellation point
-       if (isconforming) {
+       if (conforming) {
                pthread_cleanup_push(_pthread_cond_cleanup, (void *)cond);
                updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec);
                pthread_cleanup_push(_pthread_cond_cleanup, (void *)cond);
                updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec);
-               _pthread_testcancel(pthread_self(), isconforming);
+               _pthread_testcancel(conforming);
                pthread_cleanup_pop(0);
        } else {
                updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec);
                pthread_cleanup_pop(0);
        } else {
                updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec);
@@ -592,12 +591,12 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                }
 
                // add unlock ref to show one less waiter
                }
 
                // add unlock ref to show one less waiter
-               _pthread_cond_updateval(cond, err, 0);
+               _pthread_cond_updateval(cond, mutex, err, 0);
        } else if (updateval != 0) {
                // Successful wait
                // The return due to prepost and might have bit states
                // update S and return for prepo if needed
        } else if (updateval != 0) {
                // Successful wait
                // The return due to prepost and might have bit states
                // update S and return for prepo if needed
-               _pthread_cond_updateval(cond, 0, updateval);
+               _pthread_cond_updateval(cond, mutex, 0, updateval);
        }
 
        pthread_mutex_lock(omutex);
        }
 
        pthread_mutex_lock(omutex);
@@ -609,25 +608,20 @@ static void
 _pthread_cond_cleanup(void *arg)
 {
        _pthread_cond *cond = (_pthread_cond *)arg;
 _pthread_cond_cleanup(void *arg)
 {
        _pthread_cond *cond = (_pthread_cond *)arg;
+       pthread_t thread = pthread_self();
        pthread_mutex_t *mutex;
 
 // 4597450: begin
        pthread_mutex_t *mutex;
 
 // 4597450: begin
-       pthread_t thread = pthread_self();
-       int thcanceled = 0;
-
-       _PTHREAD_LOCK(thread->lock);
-       thcanceled = (thread->detached & _PTHREAD_WASCANCEL);
-       _PTHREAD_UNLOCK(thread->lock);
-
-       if (thcanceled == 0) {
+       if (!thread->canceled) {
                return;
        }
                return;
        }
-
 // 4597450: end
 // 4597450: end
+
        mutex = (pthread_mutex_t *)cond->busy;
 
        // add unlock ref to show one less waiter
        mutex = (pthread_mutex_t *)cond->busy;
 
        // add unlock ref to show one less waiter
-       _pthread_cond_updateval(cond, thread->cancel_error, 0);
+       _pthread_cond_updateval(cond, (_pthread_mutex *)mutex,
+                       thread->cancel_error, 0);
 
        /*
        ** Can't do anything if this fails -- we're on the way out
 
        /*
        ** Can't do anything if this fails -- we're on the way out
@@ -637,11 +631,9 @@ _pthread_cond_cleanup(void *arg)
        }
 }
 
        }
 }
 
-#define ECVCERORR       256
-#define ECVPERORR       512
-
 static void
 static void
-_pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
+_pthread_cond_updateval(_pthread_cond *cond, _pthread_mutex *mutex,
+               int error, uint32_t updateval)
 {
        int needclearpre;
 
 {
        int needclearpre;
 
@@ -653,10 +645,10 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
 
        if (error != 0) {
                updateval = PTHRW_INC;
 
        if (error != 0) {
                updateval = PTHRW_INC;
-               if ((error & ECVCERORR) != 0) {
+               if (error & ECVCLEARED) {
                        updateval |= PTH_RWS_CV_CBIT;
                }
                        updateval |= PTH_RWS_CV_CBIT;
                }
-               if ((error & ECVPERORR) != 0) {
+               if (error & ECVPREPOST) {
                        updateval |= PTH_RWS_CV_PBIT;
                }
        }
                        updateval |= PTH_RWS_CV_PBIT;
                }
        }
@@ -675,7 +667,10 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
                oldval64 = (((uint64_t)scntval) << 32);
                oldval64 |= lcntval;
 
                oldval64 = (((uint64_t)scntval) << 32);
                oldval64 |= lcntval;
 
-               if (diffgen <= 0) {
+               PTHREAD_TRACE(psynch_cvar_updateval | DBG_FUNC_START, cond, oldval64,
+                               updateval, 0);
+
+               if (diffgen <= 0 && !is_rws_pbit_set(updateval)) {
                        /* TBD: Assert, should not be the case */
                        /* validate it is spurious and return */
                        newval64 = oldval64;
                        /* TBD: Assert, should not be the case */
                        /* validate it is spurious and return */
                        newval64 = oldval64;
@@ -700,19 +695,22 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
                }
        } while (!os_atomic_cmpxchg(c_lsseqaddr, oldval64, newval64, seq_cst));
 
                }
        } while (!os_atomic_cmpxchg(c_lsseqaddr, oldval64, newval64, seq_cst));
 
+       PTHREAD_TRACE(psynch_cvar_updateval | DBG_FUNC_END, cond, newval64,
+                       (uint64_t)diffgen << 32 | needclearpre, 0);
+
        if (diffgen > 0) {
                // if L == S, then reset associated mutex
                if ((nsval & PTHRW_COUNT_MASK) == (lcntval & PTHRW_COUNT_MASK)) {
                        cond->busy = NULL;
                }
        if (diffgen > 0) {
                // if L == S, then reset associated mutex
                if ((nsval & PTHRW_COUNT_MASK) == (lcntval & PTHRW_COUNT_MASK)) {
                        cond->busy = NULL;
                }
+       }
 
 
-               if (needclearpre != 0) {
-                       uint32_t flags = 0;
-                       if (cond->pshared == PTHREAD_PROCESS_SHARED) {
-                               flags |= _PTHREAD_MTX_OPT_PSHARED;
-                       }
-                       (void)__psynch_cvclrprepost(cond, lcntval, ucntval, nsval, 0, lcntval, flags);
+       if (needclearpre) {
+               uint32_t flags = 0;
+               if (cond->pshared == PTHREAD_PROCESS_SHARED) {
+                       flags |= _PTHREAD_MTX_OPT_PSHARED;
                }
                }
+               (void)__psynch_cvclrprepost(cond, lcntval, ucntval, nsval, 0, lcntval, flags);
        }
 }
 
        }
 }
 
diff --git a/src/pthread_dependency.c b/src/pthread_dependency.c
new file mode 100644 (file)
index 0000000..282dfc3
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include "resolver.h"
+#include "internal.h"
+#include "dependency_private.h"
+#include <sys/ulock.h>
+
+#define PREREQUISITE_FULFILLED  (~0u)
+
+PTHREAD_NOEXPORT
+void _pthread_dependency_fulfill_slow(pthread_dependency_t *pr, uint32_t old);
+
+OS_ALWAYS_INLINE
+static inline mach_port_t
+_pthread_dependency_self(void)
+{
+       void *v = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF);
+       return (mach_port_t)(uintptr_t)v;
+}
+
+void
+pthread_dependency_init_np(pthread_dependency_t *pr, pthread_t pth,
+               pthread_dependency_attr_t *attrs)
+{
+       if (attrs) *(volatile char *)attrs;
+       *pr = (pthread_dependency_t)PTHREAD_DEPENDENCY_INITIALIZER_NP(pth);
+}
+
+OS_NOINLINE
+void
+_pthread_dependency_fulfill_slow(pthread_dependency_t *pr, uint32_t old)
+{
+       if (old == PREREQUISITE_FULFILLED) {
+               PTHREAD_CLIENT_CRASH(0, "Fufilling pthread_dependency_t twice");
+       }
+       if (os_unlikely(old != _pthread_dependency_self())) {
+               PTHREAD_CLIENT_CRASH(old, "Fulfilled a dependency "
+                               "not owned by current thread");
+       }
+
+       int ret = __ulock_wake(UL_UNFAIR_LOCK | ULF_NO_ERRNO, &pr->__pdep_opaque1, 0);
+       switch (-ret) {
+       case 0:
+       case ENOENT:
+               return;
+       default:
+               PTHREAD_INTERNAL_CRASH(-ret, "__ulock_wake() failed");
+       }
+}
+
+
+void
+pthread_dependency_fulfill_np(pthread_dependency_t *pr, void *value)
+{
+       uint32_t old;
+
+       pr->__pdep_opaque2 = (uint64_t)(uintptr_t)value;
+       old = os_atomic_xchg(&pr->__pdep_opaque1, PREREQUISITE_FULFILLED, release);
+
+       if (old != 0) _pthread_dependency_fulfill_slow(pr, old);
+}
+
+void *
+pthread_dependency_wait_np(pthread_dependency_t *pr)
+{
+       if (os_atomic_cmpxchg(&pr->__pdep_opaque1, 0, pr->__pdep_owner, relaxed)) {
+               int ret;
+       again:
+               ret = __ulock_wait(UL_UNFAIR_LOCK | ULF_NO_ERRNO, &pr->__pdep_opaque1,
+                               pr->__pdep_owner, 0);
+               switch (-ret) {
+               case EFAULT:
+                       if (pr->__pdep_opaque1 == pr->__pdep_owner) goto again;
+               case 0:
+                       break;
+               case EOWNERDEAD:
+                       PTHREAD_CLIENT_CRASH(pr->__pdep_owner, "Waiting on orphaned dependency");
+               default:
+                       PTHREAD_CLIENT_CRASH(-ret, "__ulock_wait() failed");
+               }
+       }
+
+       uint32_t cur = os_atomic_load(&pr->__pdep_opaque1, acquire);
+       if (cur == PREREQUISITE_FULFILLED) {
+               return (void *)(uintptr_t)pr->__pdep_opaque2;
+       }
+       PTHREAD_CLIENT_CRASH(cur, "Corrupted pthread_dependency_t");
+}
+
index a68503c09b2ae4414ea4eb7e65198f00f93c56dc..edc97ee38cbcdb10948eed3da81fd43d4f748084 100644 (file)
@@ -54,8 +54,6 @@
 #include "internal.h"
 #include "kern/kern_trace.h"
 
 #include "internal.h"
 #include "kern/kern_trace.h"
 
-extern int __unix_conforming;
-
 #ifndef BUILDING_VARIANT /* [ */
 
 #ifdef PLOCKSTAT
 #ifndef BUILDING_VARIANT /* [ */
 
 #ifdef PLOCKSTAT
@@ -85,31 +83,73 @@ _plockstat_never_fired(void)
 
 #define PTHREAD_MUTEX_INIT_UNUSED 1
 
 
 #define PTHREAD_MUTEX_INIT_UNUSED 1
 
+PTHREAD_NOEXPORT PTHREAD_WEAK
+int _pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_fairshare_lock_slow(_pthread_mutex *mutex, bool trylock);
+
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
-int _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock);
+int _pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock);
 
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
 
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
-int _pthread_mutex_unlock_slow(pthread_mutex_t *omutex);
+int _pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex);
 
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
 int _pthread_mutex_corruption_abort(_pthread_mutex *mutex);
 
 
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
 int _pthread_mutex_corruption_abort(_pthread_mutex *mutex);
 
-extern int __pthread_mutex_default_policy PTHREAD_NOEXPORT;
+extern int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT;
+
+
+int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT =
+               _PTHREAD_MTX_OPT_POLICY_DEFAULT;
 
 
+static inline bool
+_pthread_mutex_policy_validate(int policy)
+{
+       return (policy >= 0 && policy < _PTHREAD_MUTEX_POLICY_LAST);
+}
 
 
-int __pthread_mutex_default_policy PTHREAD_NOEXPORT =
-               _PTHREAD_MUTEX_POLICY_FAIRSHARE;
+static inline int
+_pthread_mutex_policy_to_opt(int policy)
+{
+       switch (policy) {
+       case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP:
+               return _PTHREAD_MTX_OPT_POLICY_FAIRSHARE;
+       case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP:
+               return _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+       default:
+               __builtin_unreachable();
+       }
+}
 
 PTHREAD_NOEXPORT
 void
 _pthread_mutex_global_init(const char *envp[],
                struct _pthread_registration_data *registration_data)
 {
 
 PTHREAD_NOEXPORT
 void
 _pthread_mutex_global_init(const char *envp[],
                struct _pthread_registration_data *registration_data)
 {
+
+       int opt = _PTHREAD_MTX_OPT_POLICY_DEFAULT;
+       if (registration_data->mutex_default_policy) {
+               int policy = registration_data->mutex_default_policy;
+               if (_pthread_mutex_policy_validate(policy)) {
+                       opt = _pthread_mutex_policy_to_opt(policy);
+               }
+       }
+
        const char *envvar = _simple_getenv(envp, "PTHREAD_MUTEX_DEFAULT_POLICY");
        const char *envvar = _simple_getenv(envp, "PTHREAD_MUTEX_DEFAULT_POLICY");
-       if ((envvar && (envvar[0] - '0') == _PTHREAD_MUTEX_POLICY_FIRSTFIT) ||
-                       (registration_data->mutex_default_policy ==
-                               _PTHREAD_MUTEX_POLICY_FIRSTFIT)) {
-               __pthread_mutex_default_policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+       if (envvar) {
+               int policy = envvar[0] - '0';
+               if (_pthread_mutex_policy_validate(policy)) {
+                       opt = _pthread_mutex_policy_to_opt(policy);
+               }
+       }
+
+       if (opt != __pthread_mutex_default_opt_policy) {
+               __pthread_mutex_default_opt_policy = opt;
        }
 }
 
        }
 }
 
@@ -162,7 +202,7 @@ mutex_seq_load(mutex_seq *seqaddr, mutex_seq *oldseqval)
 #define mutex_seq_atomic_load(seqaddr, oldseqval, m) \
                mutex_seq_atomic_load_##m(seqaddr, oldseqval)
 
 #define mutex_seq_atomic_load(seqaddr, oldseqval, m) \
                mutex_seq_atomic_load_##m(seqaddr, oldseqval)
 
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
 static inline bool
 mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
                mutex_seq *newseqval)
 static inline bool
 mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
                mutex_seq *newseqval)
@@ -171,7 +211,7 @@ mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
                        newseqval->seq_LU, &oldseqval->seq_LU, relaxed);
 }
 
                        newseqval->seq_LU, &oldseqval->seq_LU, relaxed);
 }
 
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
 static inline bool
 mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
                mutex_seq *newseqval)
 static inline bool
 mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
                mutex_seq *newseqval)
@@ -180,7 +220,7 @@ mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
                        newseqval->seq_LU, &oldseqval->seq_LU, acquire);
 }
 
                        newseqval->seq_LU, &oldseqval->seq_LU, acquire);
 }
 
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
 static inline bool
 mutex_seq_atomic_cmpxchgv_release(mutex_seq *seqaddr, mutex_seq *oldseqval,
                mutex_seq *newseqval)
 static inline bool
 mutex_seq_atomic_cmpxchgv_release(mutex_seq *seqaddr, mutex_seq *oldseqval,
                mutex_seq *newseqval)
@@ -274,8 +314,16 @@ pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *attr, int *policy)
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
-               *policy = attr->policy;
-               res = 0;
+               switch (attr->opt) {
+               case _PTHREAD_MTX_OPT_POLICY_FAIRSHARE:
+                       *policy = PTHREAD_MUTEX_POLICY_FAIRSHARE_NP;
+                       res = 0;
+                       break;
+               case _PTHREAD_MTX_OPT_POLICY_FIRSTFIT:
+                       *policy = PTHREAD_MUTEX_POLICY_FIRSTFIT_NP;
+                       res = 0;
+                       break;
+               }
        }
        return res;
 }
        }
        return res;
 }
@@ -307,7 +355,7 @@ pthread_mutexattr_init(pthread_mutexattr_t *attr)
 {
        attr->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
        attr->protocol = _PTHREAD_DEFAULT_PROTOCOL;
 {
        attr->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
        attr->protocol = _PTHREAD_DEFAULT_PROTOCOL;
-       attr->policy = __pthread_mutex_default_policy;
+       attr->opt = __pthread_mutex_default_opt_policy;
        attr->type = PTHREAD_MUTEX_DEFAULT;
        attr->sig = _PTHREAD_MUTEX_ATTR_SIG;
        attr->pshared = _PTHREAD_DEFAULT_PSHARED;
        attr->type = PTHREAD_MUTEX_DEFAULT;
        attr->sig = _PTHREAD_MUTEX_ATTR_SIG;
        attr->pshared = _PTHREAD_DEFAULT_PSHARED;
@@ -349,12 +397,18 @@ pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *attr, int policy)
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
 {
        int res = EINVAL;
        if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
+               // <rdar://problem/35844519> the first-fit implementation was broken
+               // pre-Liberty so this mapping exists to ensure that the old first-fit
+               // define (2) is no longer valid when used on older systems.
                switch (policy) {
                switch (policy) {
-                       case _PTHREAD_MUTEX_POLICY_FAIRSHARE:
-                       case _PTHREAD_MUTEX_POLICY_FIRSTFIT:
-                               attr->policy = policy;
-                               res = 0;
-                               break;
+               case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP:
+                       attr->opt = _PTHREAD_MTX_OPT_POLICY_FAIRSHARE;
+                       res = 0;
+                       break;
+               case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP:
+                       attr->opt = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+                       res = 0;
+                       break;
                }
        }
        return res;
                }
        }
        return res;
@@ -412,6 +466,115 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex)
 }
 
 
 }
 
 
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_check_init_slow(_pthread_mutex *mutex)
+{
+       int res = EINVAL;
+
+       if (_pthread_mutex_check_signature_init(mutex)) {
+               _PTHREAD_LOCK(mutex->lock);
+               if (_pthread_mutex_check_signature_init(mutex)) {
+                       // initialize a statically initialized mutex to provide
+                       // compatibility for misbehaving applications.
+                       // (unlock should not be the first operation on a mutex)
+                       res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
+               } else if (_pthread_mutex_check_signature(mutex)) {
+                       res = 0;
+               }
+               _PTHREAD_UNLOCK(mutex->lock);
+       } else if (_pthread_mutex_check_signature(mutex)) {
+               res = 0;
+       }
+       if (res != 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
+       }
+       return res;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_check_init(_pthread_mutex *mutex)
+{
+       int res = 0;
+       if (!_pthread_mutex_check_signature(mutex)) {
+               return _pthread_mutex_check_init_slow(mutex);
+       }
+       return res;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_fairshare(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FAIRSHARE);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_firstfit(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_recursive(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE);
+}
+
+PTHREAD_ALWAYS_INLINE
+static int
+_pthread_mutex_lock_handle_options(_pthread_mutex *mutex, bool trylock,
+               uint64_t *tidaddr)
+{
+       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) {
+               // NORMAL does not do EDEADLK checking
+               return 0;
+       }
+
+       uint64_t selfid = _pthread_selfid_direct();
+       if (os_atomic_load(tidaddr, relaxed) == selfid) {
+               if (_pthread_mutex_is_recursive(mutex)) {
+                       if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
+                               mutex->mtxopts.options.lock_count += 1;
+                               return mutex->mtxopts.options.lock_count;
+                       } else {
+                               return -EAGAIN;
+                       }
+               } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
+                       // <rdar://problem/16261552> as per OpenGroup, trylock cannot
+                       // return EDEADLK on a deadlock, it should return EBUSY.
+                       return -EBUSY;
+               } else { /* PTHREAD_MUTEX_ERRORCHECK */
+                       return -EDEADLK;
+               }
+       }
+
+       // Not recursive, or recursive but first lock.
+       return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static int
+_pthread_mutex_unlock_handle_options(_pthread_mutex *mutex, uint64_t *tidaddr)
+{
+       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) {
+               // NORMAL does not do EDEADLK checking
+               return 0;
+       }
+
+       uint64_t selfid = _pthread_selfid_direct();
+       if (os_atomic_load(tidaddr, relaxed) != selfid) {
+               return -EPERM;
+       } else if (_pthread_mutex_is_recursive(mutex) &&
+                       --mutex->mtxopts.options.lock_count) {
+               return 1;
+       }
+       return 0;
+}
+
 /*
  * Sequence numbers and TID:
  *
 /*
  * Sequence numbers and TID:
  *
@@ -444,11 +607,9 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex)
  */
 PTHREAD_ALWAYS_INLINE
 static inline int
  */
 PTHREAD_ALWAYS_INLINE
 static inline int
-_pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
-               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+_pthread_mutex_fairshare_unlock_updatebits(_pthread_mutex *mutex,
+               uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
 {
 {
-       bool firstfit = (mutex->mtxopts.options.policy ==
-                       _PTHREAD_MUTEX_POLICY_FIRSTFIT);
        uint32_t flags = mutex->mtxopts.value;
        flags &= ~_PTHREAD_MTX_OPT_NOTIFY; // no notification by default
 
        uint32_t flags = mutex->mtxopts.value;
        flags &= ~_PTHREAD_MTX_OPT_NOTIFY; // no notification by default
 
@@ -462,27 +623,24 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t oldtid, newtid;
 
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t oldtid, newtid;
 
-       if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-               uint64_t selfid = _pthread_selfid_direct();
-               if (os_atomic_load(tidaddr, relaxed) != selfid) {
-                       PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, EPERM);
-                       return EPERM;
-               } else if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE &&
-                          --mutex->mtxopts.options.lock_count) {
-                       PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
-                       if (flagsp != NULL) {
-                               *flagsp = flags;
-                       }
-                       return 0;
+       int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr);
+       if (res > 0) {
+               // Valid recursive unlock
+               if (flagsp) {
+                       *flagsp = flags;
                }
                }
+               PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
+               return 0;
+       } else if (res < 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res);
+               return -res;
        }
 
        }
 
-       bool clearprepost, clearnotify, spurious;
+       bool clearnotify, spurious;
        do {
                newseq = oldseq;
                oldtid = os_atomic_load(tidaddr, relaxed);
 
        do {
                newseq = oldseq;
                oldtid = os_atomic_load(tidaddr, relaxed);
 
-               clearprepost = false;
                clearnotify = false;
                spurious = false;
 
                clearnotify = false;
                spurious = false;
 
@@ -504,13 +662,7 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
                                clearnotify = true;
                                newtid = 0; // clear owner
                        } else {
                                clearnotify = true;
                                newtid = 0; // clear owner
                        } else {
-                               if (firstfit) {
-                                       // reset E bit so another can acquire meanwhile
-                                       newseq.lgenval &= ~PTH_RWL_EBIT;
-                                       newtid = 0;
-                               } else {
-                                       newtid = PTHREAD_MTX_TID_SWITCHING;
-                               }
+                               newtid = PTHREAD_MTX_TID_SWITCHING;
                                // need to signal others waiting for mutex
                                flags |= _PTHREAD_MTX_OPT_NOTIFY;
                        }
                                // need to signal others waiting for mutex
                                flags |= _PTHREAD_MTX_OPT_NOTIFY;
                        }
@@ -530,21 +682,12 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
 
                if (clearnotify || spurious) {
                        flags &= ~_PTHREAD_MTX_OPT_NOTIFY;
 
                if (clearnotify || spurious) {
                        flags &= ~_PTHREAD_MTX_OPT_NOTIFY;
-                       if (firstfit && (newseq.lgenval & PTH_RWL_PBIT)) {
-                               clearprepost = true;
-                               newseq.lgenval &= ~PTH_RWL_PBIT;
-                       }
                }
        } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
 
        PTHREAD_TRACE(psynch_mutex_unlock_updatebits, mutex, oldseq.lgenval,
                        newseq.lgenval, oldtid);
 
                }
        } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
 
        PTHREAD_TRACE(psynch_mutex_unlock_updatebits, mutex, oldseq.lgenval,
                        newseq.lgenval, oldtid);
 
-       if (clearprepost) {
-               __psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0,
-                               newseq.lgenval, flags | _PTHREAD_MTX_OPT_MUTEX);
-       }
-
        if (mgenp != NULL) {
                *mgenp = newseq.lgenval;
        }
        if (mgenp != NULL) {
                *mgenp = newseq.lgenval;
        }
@@ -561,20 +704,11 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
        return 0;
 }
 
        return 0;
 }
 
-PTHREAD_NOEXPORT PTHREAD_NOINLINE
-int
-_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp,
-               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
-{
-       return _pthread_mutex_unlock_updatebits(mutex, flagsp, pmtxp, mgenp, ugenp);
-}
-
 PTHREAD_ALWAYS_INLINE
 static inline int
 PTHREAD_ALWAYS_INLINE
 static inline int
-_pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
+_pthread_mutex_fairshare_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 {
 {
-       bool firstfit = (mutex->mtxopts.options.policy ==
-                       _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+       bool firstfit = _pthread_mutex_is_firstfit(mutex);
        bool gotlock = true;
 
        mutex_seq *seqaddr;
        bool gotlock = true;
 
        mutex_seq *seqaddr;
@@ -585,11 +719,9 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
-       uint64_t oldtid;
 
        do {
                newseq = oldseq;
 
        do {
                newseq = oldseq;
-               oldtid = os_atomic_load(tidaddr, relaxed);
 
                if (firstfit) {
                        // firstfit locks can have the lock stolen out from under a locker
 
                if (firstfit) {
                        // firstfit locks can have the lock stolen out from under a locker
@@ -605,17 +737,14 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 
                newseq.lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT;
        } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
 
                newseq.lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT;
        } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
-                       relaxed));
+                       acquire));
 
        if (gotlock) {
 
        if (gotlock) {
-               if (!os_atomic_cmpxchg(tidaddr, oldtid, selfid, relaxed)) {
-                       // we own this mutex, nobody should be updating it except us
-                       return _pthread_mutex_corruption_abort(mutex);
-               }
+               os_atomic_store(tidaddr, selfid, relaxed);
        }
 
        PTHREAD_TRACE(psynch_mutex_lock_updatebits, mutex, oldseq.lgenval,
        }
 
        PTHREAD_TRACE(psynch_mutex_lock_updatebits, mutex, oldseq.lgenval,
-                       newseq.lgenval, oldtid);
+                       newseq.lgenval, 0);
 
        // failing to take the lock in firstfit returns 1 to force the caller
        // to wait in the kernel
 
        // failing to take the lock in firstfit returns 1 to force the caller
        // to wait in the kernel
@@ -624,114 +753,36 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 
 PTHREAD_NOINLINE
 static int
 
 PTHREAD_NOINLINE
 static int
-_pthread_mutex_markprepost(_pthread_mutex *mutex, uint32_t updateval)
-{
-       mutex_seq *seqaddr;
-       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-
-       mutex_seq oldseq, newseq;
-       mutex_seq_load(seqaddr, &oldseq);
-
-       bool clearprepost;
-       do {
-               clearprepost = false;
-               newseq = oldseq;
-
-               /* update the bits */
-               if ((oldseq.lgenval & PTHRW_COUNT_MASK) ==
-                               (oldseq.ugenval & PTHRW_COUNT_MASK)) {
-                       clearprepost = true;
-                       newseq.lgenval &= ~PTH_RWL_PBIT;
-               } else {
-                       newseq.lgenval |= PTH_RWL_PBIT;
-               }
-       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, relaxed));
-
-       if (clearprepost) {
-               __psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0,
-                               newseq.lgenval, mutex->mtxopts.value | _PTHREAD_MTX_OPT_MUTEX);
-       }
-
-       return 0;
-}
-
-PTHREAD_NOINLINE
-static int
-_pthread_mutex_check_init_slow(pthread_mutex_t *omutex)
-{
-       int res = EINVAL;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
-       if (_pthread_mutex_check_signature_init(mutex)) {
-               _PTHREAD_LOCK(mutex->lock);
-               if (_pthread_mutex_check_signature_init(mutex)) {
-                       // initialize a statically initialized mutex to provide
-                       // compatibility for misbehaving applications.
-                       // (unlock should not be the first operation on a mutex)
-                       res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
-               } else if (_pthread_mutex_check_signature(mutex)) {
-                       res = 0;
-               }
-               _PTHREAD_UNLOCK(mutex->lock);
-       } else if (_pthread_mutex_check_signature(mutex)) {
-               res = 0;
-       }
-       if (res != 0) {
-               PLOCKSTAT_MUTEX_ERROR(omutex, res);
-       }
-       return res;
-}
-
-PTHREAD_ALWAYS_INLINE
-static inline int
-_pthread_mutex_check_init(pthread_mutex_t *omutex)
-{
-       int res = 0;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
-       if (!_pthread_mutex_check_signature(mutex)) {
-               return _pthread_mutex_check_init_slow(omutex);
-       }
-       return res;
-}
-
-PTHREAD_NOINLINE
-static int
-_pthread_mutex_lock_wait(pthread_mutex_t *omutex, mutex_seq newseq,
+_pthread_mutex_fairshare_lock_wait(_pthread_mutex *mutex, mutex_seq newseq,
                uint64_t oldtid)
 {
                uint64_t oldtid)
 {
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t selfid = _pthread_selfid_direct();
 
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t selfid = _pthread_selfid_direct();
 
-       PLOCKSTAT_MUTEX_BLOCK(omutex);
+       PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex);
        do {
                uint32_t updateval;
                do {
        do {
                uint32_t updateval;
                do {
-                       updateval = __psynch_mutexwait(omutex, newseq.lgenval,
+                       updateval = __psynch_mutexwait(mutex, newseq.lgenval,
                                        newseq.ugenval, oldtid, mutex->mtxopts.value);
                        oldtid = os_atomic_load(tidaddr, relaxed);
                } while (updateval == (uint32_t)-1);
 
                // returns 0 on succesful update; in firstfit it may fail with 1
                                        newseq.ugenval, oldtid, mutex->mtxopts.value);
                        oldtid = os_atomic_load(tidaddr, relaxed);
                } while (updateval == (uint32_t)-1);
 
                // returns 0 on succesful update; in firstfit it may fail with 1
-       } while (_pthread_mutex_lock_updatebits(mutex, selfid) == 1);
-       PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT);
+       } while (_pthread_mutex_fairshare_lock_updatebits(mutex, selfid) == 1);
+       PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT);
 
        return 0;
 }
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
 
        return 0;
 }
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
-_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
+_pthread_mutex_fairshare_lock_slow(_pthread_mutex *omutex, bool trylock)
 {
        int res, recursive = 0;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
 
 {
        int res, recursive = 0;
        _pthread_mutex *mutex = (_pthread_mutex *)omutex;
 
-       res = _pthread_mutex_check_init(omutex);
-       if (res != 0) return res;
-
        mutex_seq *seqaddr;
        MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
 
        mutex_seq *seqaddr;
        MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
 
@@ -742,25 +793,14 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t oldtid, selfid = _pthread_selfid_direct();
 
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t oldtid, selfid = _pthread_selfid_direct();
 
-       if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-               if (os_atomic_load(tidaddr, relaxed) == selfid) {
-                       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
-                               if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
-                                       mutex->mtxopts.options.lock_count++;
-                                       recursive = 1;
-                                       res = 0;
-                               } else {
-                                       res = EAGAIN;
-                               }
-                       } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
-                               // <rdar://problem/16261552> as per OpenGroup, trylock cannot
-                               // return EDEADLK on a deadlock, it should return EBUSY.
-                               res = EBUSY;
-                       } else  { /* PTHREAD_MUTEX_ERRORCHECK */
-                               res = EDEADLK;
-                       }
-                       goto out;
-               }
+       res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr);
+       if (res > 0) {
+               recursive = 1;
+               res = 0;
+               goto out;
+       } else if (res < 0) {
+               res = -res;
+               goto out;
        }
 
        bool gotlock;
        }
 
        bool gotlock;
@@ -797,44 +837,39 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
        } else {
                PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, omutex,
                                newseq.lgenval, newseq.ugenval, oldtid);
        } else {
                PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, omutex,
                                newseq.lgenval, newseq.ugenval, oldtid);
-               res = _pthread_mutex_lock_wait(omutex, newseq, oldtid);
+               res = _pthread_mutex_fairshare_lock_wait(mutex, newseq, oldtid);
                PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, omutex,
                                newseq.lgenval, newseq.ugenval, oldtid);
        }
 
                PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, omutex,
                                newseq.lgenval, newseq.ugenval, oldtid);
        }
 
-       if (res == 0 && mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
+       if (res == 0 && _pthread_mutex_is_recursive(mutex)) {
                mutex->mtxopts.options.lock_count = 1;
        }
 
 out:
 #if PLOCKSTAT
        if (res == 0) {
                mutex->mtxopts.options.lock_count = 1;
        }
 
 out:
 #if PLOCKSTAT
        if (res == 0) {
-               PLOCKSTAT_MUTEX_ACQUIRE(omutex, recursive, 0);
+               PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0);
        } else {
        } else {
-               PLOCKSTAT_MUTEX_ERROR(omutex, res);
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
        }
 #endif
 
        return res;
 }
 
        }
 #endif
 
        return res;
 }
 
-PTHREAD_ALWAYS_INLINE
+PTHREAD_NOINLINE
 static inline int
 static inline int
-_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
+_pthread_mutex_fairshare_lock(_pthread_mutex *mutex, bool trylock)
 {
 #if ENABLE_USERSPACE_TRACE
 {
 #if ENABLE_USERSPACE_TRACE
-       return _pthread_mutex_lock_slow(omutex, trylock);
+       return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
 #elif PLOCKSTAT
        if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
 #elif PLOCKSTAT
        if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
-               return _pthread_mutex_lock_slow(omutex, trylock);
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
        }
 #endif
 
        }
 #endif
 
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
-               return _pthread_mutex_lock_slow(omutex, trylock);
-       }
-
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t selfid = _pthread_selfid_direct();
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
        uint64_t selfid = _pthread_selfid_direct();
@@ -846,7 +881,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
        mutex_seq_load(seqaddr, &oldseq);
 
        if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
        mutex_seq_load(seqaddr, &oldseq);
 
        if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
-               return _pthread_mutex_lock_slow(omutex, trylock);
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
        }
 
        bool gotlock;
        }
 
        bool gotlock;
@@ -865,7 +900,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
                        newseq.lgenval += PTHRW_INC;
                        newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
                } else {
                        newseq.lgenval += PTHRW_INC;
                        newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
                } else {
-                       return _pthread_mutex_lock_slow(omutex, trylock);
+                       return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
                }
        } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
                        acquire)));
                }
        } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
                        acquire)));
@@ -880,45 +915,24 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
        }
 }
 
        }
 }
 
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_lock(pthread_mutex_t *mutex)
-{
-       return _pthread_mutex_lock(mutex, false);
-}
-
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_trylock(pthread_mutex_t *mutex)
-{
-       return _pthread_mutex_lock(mutex, true);
-}
-
-/*
- * Unlock a mutex.
- * TODO: Priority inheritance stuff
- */
-
 PTHREAD_NOINLINE
 static int
 PTHREAD_NOINLINE
 static int
-_pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
+_pthread_mutex_fairshare_unlock_drop(_pthread_mutex *mutex, mutex_seq newseq,
                uint32_t flags)
 {
        int res;
                uint32_t flags)
 {
        int res;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
        uint32_t updateval;
 
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
        uint32_t updateval;
 
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
-       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, omutex, newseq.lgenval,
+       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, mutex, newseq.lgenval,
                        newseq.ugenval, os_atomic_load(tidaddr, relaxed));
 
                        newseq.ugenval, os_atomic_load(tidaddr, relaxed));
 
-       updateval = __psynch_mutexdrop(omutex, newseq.lgenval, newseq.ugenval,
+       updateval = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval,
                        os_atomic_load(tidaddr, relaxed), flags);
 
                        os_atomic_load(tidaddr, relaxed), flags);
 
-       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, omutex, updateval, 0, 0);
+       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, mutex, updateval, 0, 0);
 
        if (updateval == (uint32_t)-1) {
                res = errno;
 
        if (updateval == (uint32_t)-1) {
                res = errno;
@@ -930,9 +944,6 @@ _pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
                        PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
                }
                return res;
                        PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
                }
                return res;
-       } else if ((mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT)
-                       && (updateval & PTH_RWL_PBIT)) {
-               return _pthread_mutex_markprepost(mutex, updateval);
        }
 
        return 0;
        }
 
        return 0;
@@ -940,49 +951,39 @@ _pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
-_pthread_mutex_unlock_slow(pthread_mutex_t *omutex)
+_pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex)
 {
        int res;
 {
        int res;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
        mutex_seq newseq;
        uint32_t flags;
 
        mutex_seq newseq;
        uint32_t flags;
 
-       // Initialize static mutexes for compatibility with misbehaving
-       // applications (unlock should not be the first operation on a mutex).
-       res = _pthread_mutex_check_init(omutex);
-       if (res != 0) return res;
-
-       res = _pthread_mutex_unlock_updatebits(mutex, &flags, NULL, &newseq.lgenval,
-                       &newseq.ugenval);
+       res = _pthread_mutex_fairshare_unlock_updatebits(mutex, &flags, NULL,
+                       &newseq.lgenval, &newseq.ugenval);
        if (res != 0) return res;
 
        if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
        if (res != 0) return res;
 
        if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
-               return _pthread_mutex_unlock_drop(omutex, newseq, flags);
+               return _pthread_mutex_fairshare_unlock_drop(mutex, newseq, flags);
        } else {
                uint64_t *tidaddr;
                MUTEX_GETTID_ADDR(mutex, &tidaddr);
        } else {
                uint64_t *tidaddr;
                MUTEX_GETTID_ADDR(mutex, &tidaddr);
-               PTHREAD_TRACE(psynch_mutex_uunlock, omutex, newseq.lgenval,
+               PTHREAD_TRACE(psynch_mutex_uunlock, mutex, newseq.lgenval,
                                newseq.ugenval, os_atomic_load(tidaddr, relaxed));
        }
 
        return 0;
 }
 
                                newseq.ugenval, os_atomic_load(tidaddr, relaxed));
        }
 
        return 0;
 }
 
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_unlock(pthread_mutex_t *omutex)
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_fairshare_unlock(_pthread_mutex *mutex)
 {
 #if ENABLE_USERSPACE_TRACE
 {
 #if ENABLE_USERSPACE_TRACE
-       return _pthread_mutex_unlock_slow(omutex);
+       return _pthread_mutex_fairshare_unlock_slow(mutex);
 #elif PLOCKSTAT
        if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
 #elif PLOCKSTAT
        if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
-               return _pthread_mutex_unlock_slow(omutex);
+               return _pthread_mutex_fairshare_unlock_slow(mutex);
        }
 #endif
        }
 #endif
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
-               return _pthread_mutex_unlock_slow(omutex);
-       }
 
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
        uint64_t *tidaddr;
        MUTEX_GETTID_ADDR(mutex, &tidaddr);
@@ -1012,13 +1013,15 @@ pthread_mutex_unlock(pthread_mutex_t *omutex)
 
                if (os_likely((oldseq.lgenval & PTHRW_COUNT_MASK) ==
                                (newseq.ugenval & PTHRW_COUNT_MASK))) {
 
                if (os_likely((oldseq.lgenval & PTHRW_COUNT_MASK) ==
                                (newseq.ugenval & PTHRW_COUNT_MASK))) {
-                       // our unlock sequence matches to lock sequence, so if the
-                       // CAS is successful, the mutex is unlocked
+                       // if we succeed in performing the CAS we can be sure of a fast
+                       // path (only needing the CAS) unlock, if:
+                       //   a. our lock and unlock sequence are equal
+                       //   b. we don't need to clear an unlock prepost from the kernel
 
                        // do not reset Ibit, just K&E
                        newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
                } else {
 
                        // do not reset Ibit, just K&E
                        newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
                } else {
-                       return _pthread_mutex_unlock_slow(omutex);
+                       return _pthread_mutex_fairshare_unlock_slow(mutex);
                }
        } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
                        release)));
                }
        } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
                        release)));
@@ -1026,6 +1029,468 @@ pthread_mutex_unlock(pthread_mutex_t *omutex)
        return 0;
 }
 
        return 0;
 }
 
+#pragma mark firstfit
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_firstfit_unlock_updatebits(_pthread_mutex *mutex,
+               uint32_t *flagsp, uint32_t **mutexp, uint32_t *lvalp, uint32_t *uvalp)
+{
+       uint32_t flags = mutex->mtxopts.value & ~_PTHREAD_MTX_OPT_NOTIFY;
+       bool kernel_wake;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid;
+
+       int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr);
+       if (res > 0) {
+               // Valid recursive unlock
+               if (flagsp) {
+                       *flagsp = flags;
+               }
+               PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
+               return 0;
+       } else if (res < 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res);
+               return -res;
+       }
+
+       do {
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
+               // More than one kernel waiter means we need to do a wake.
+               kernel_wake = diff_genseq(oldseq.lgenval, oldseq.ugenval) > 0;
+               newseq.lgenval &= ~PTH_RWL_EBIT;
+
+               if (kernel_wake) {
+                       // Going to the kernel post-unlock removes a single waiter unlock
+                       // from the mutex counts.
+                       newseq.ugenval += PTHRW_INC;
+               }
+
+               if (oldtid != 0) {
+                       if (!os_atomic_cmpxchg(tidaddr, oldtid, 0, relaxed)) {
+                               return _pthread_mutex_corruption_abort(mutex);
+                       }
+               }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
+
+       PTHREAD_TRACE(psynch_ffmutex_unlock_updatebits, mutex, oldseq.lgenval,
+                       newseq.lgenval, newseq.ugenval);
+
+       if (kernel_wake) {
+               // We choose to return this out via flags because the condition
+               // variable also uses this to determine whether to do a kernel wake
+               // when beginning a cvwait.
+               flags |= _PTHREAD_MTX_OPT_NOTIFY;
+       }
+       if (lvalp) {
+               *lvalp = newseq.lgenval;
+       }
+       if (uvalp) {
+               *uvalp = newseq.ugenval;
+       }
+       if (mutexp) {
+               *mutexp = (uint32_t *)mutex;
+       }
+       if (flagsp) {
+               *flagsp = flags;
+       }
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+static int
+_pthread_mutex_firstfit_wake(_pthread_mutex *mutex, mutex_seq newseq,
+               uint32_t flags)
+{
+       PTHREAD_TRACE(psynch_ffmutex_wake, mutex, newseq.lgenval, newseq.ugenval,
+                       0);
+       int res = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval, 0,
+                       flags);
+
+       if (res == -1) {
+               res = errno;
+               if (res == EINTR) {
+                       res = 0;
+               }
+               if (res != 0) {
+                       PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
+               }
+               return res;
+       }
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex)
+{
+       mutex_seq newseq;
+       uint32_t flags;
+       int res;
+
+       res = _pthread_mutex_firstfit_unlock_updatebits(mutex, &flags, NULL,
+                       &newseq.lgenval, &newseq.ugenval);
+       if (res != 0) return res;
+
+       if (flags & _PTHREAD_MTX_OPT_NOTIFY) {
+               return _pthread_mutex_firstfit_wake(mutex, newseq, flags);
+       }
+       return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static bool
+_pthread_mutex_firstfit_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid,
+               mutex_seq *newseqp)
+{
+       bool gotlock;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex,
+                       oldseq.lgenval, oldseq.ugenval, 0);
+
+       do {
+               newseq = oldseq;
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+
+               if (gotlock) {
+                       // If we see the E-bit cleared, we should just attempt to take it.
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       // If we failed to get the lock then we need to put ourselves back
+                       // in the queue of waiters. The previous unlocker that woke us out
+                       // of the kernel consumed the S-count for our previous wake. So
+                       // take another ticket on L and go back in the kernel to sleep.
+                       newseq.lgenval += PTHRW_INC;
+               }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
+
+       if (gotlock) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+       }
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex,
+                       newseq.lgenval, newseq.ugenval, 0);
+
+       if (newseqp) {
+               *newseqp = newseq;
+       }
+       return gotlock;
+}
+
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_firstfit_lock_wait(_pthread_mutex *mutex, mutex_seq newseq,
+               uint64_t oldtid)
+{
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t selfid = _pthread_selfid_direct();
+
+       PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex);
+       do {
+               uint32_t uval;
+               do {
+                       PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_START, mutex,
+                                       newseq.lgenval, newseq.ugenval, mutex->mtxopts.value);
+                       uval = __psynch_mutexwait(mutex, newseq.lgenval, newseq.ugenval,
+                                       oldtid, mutex->mtxopts.value);
+                       PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_END, mutex,
+                                       uval, 0, 0);
+                       oldtid = os_atomic_load(tidaddr, relaxed);
+               } while (uval == (uint32_t)-1);
+       } while (!_pthread_mutex_firstfit_lock_updatebits(mutex, selfid, &newseq));
+       PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT);
+
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock)
+{
+       int res, recursive = 0;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid, selfid = _pthread_selfid_direct();
+
+       res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr);
+       if (res > 0) {
+               recursive = 1;
+               res = 0;
+               goto out;
+       } else if (res < 0) {
+               res = -res;
+               goto out;
+       }
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex,
+                       oldseq.lgenval, oldseq.ugenval, 0);
+
+       bool gotlock;
+       do {
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
+
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+               if (trylock && !gotlock) {
+                       // We still want to perform the CAS here, even though it won't
+                       // do anything so that it fails if someone unlocked while we were
+                       // in the loop
+               } else if (gotlock) {
+                       // In first-fit, getting the lock simply adds the E-bit
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       // Failed to get the lock, increment the L-val and go to
+                       // the kernel to sleep
+                       newseq.lgenval += PTHRW_INC;
+               }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex,
+                       newseq.lgenval, newseq.ugenval, 0);
+
+       if (gotlock) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+               res = 0;
+               PTHREAD_TRACE(psynch_mutex_ulock, mutex, newseq.lgenval,
+                               newseq.ugenval, selfid);
+       } else if (trylock) {
+               res = EBUSY;
+               PTHREAD_TRACE(psynch_mutex_utrylock_failed, mutex, newseq.lgenval,
+                               newseq.ugenval, oldtid);
+       } else {
+               PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, mutex,
+                               newseq.lgenval, newseq.ugenval, oldtid);
+               res = _pthread_mutex_firstfit_lock_wait(mutex, newseq, oldtid);
+               PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, mutex,
+                               newseq.lgenval, newseq.ugenval, oldtid);
+       }
+
+       if (res == 0 && _pthread_mutex_is_recursive(mutex)) {
+               mutex->mtxopts.options.lock_count = 1;
+       }
+
+out:
+#if PLOCKSTAT
+       if (res == 0) {
+               PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0);
+       } else {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
+       }
+#endif
+       return res;
+}
+
+#pragma mark fast path
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp,
+               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+{
+       if (_pthread_mutex_is_fairshare(mutex)) {
+               return _pthread_mutex_fairshare_unlock_updatebits(mutex, flagsp,
+                               pmtxp, mgenp, ugenp);
+       }
+       return _pthread_mutex_firstfit_unlock_updatebits(mutex, flagsp, pmtxp,
+                       mgenp, ugenp);
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock)
+{
+       int res;
+
+       res = _pthread_mutex_check_init(mutex);
+       if (res != 0) return res;
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
+       }
+       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+static int
+_pthread_mutex_unlock_init_slow(_pthread_mutex *mutex)
+{
+       int res;
+
+       // Initialize static mutexes for compatibility with misbehaving
+       // applications (unlock should not be the first operation on a mutex).
+       res = _pthread_mutex_check_init(mutex);
+       if (res != 0) return res;
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_unlock_slow(mutex);
+       }
+       return _pthread_mutex_firstfit_unlock_slow(mutex);
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_unlock(pthread_mutex_t *omutex)
+{
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+               return _pthread_mutex_unlock_init_slow(mutex);
+       }
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_unlock(mutex);
+       }
+
+#if ENABLE_USERSPACE_TRACE
+       return _pthread_mutex_firstfit_unlock_slow(mutex);
+#elif PLOCKSTAT
+       if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+               return _pthread_mutex_firstfit_unlock_slow(mutex);
+       }
+#endif
+
+       /*
+        * This is the first-fit fast path. The fairshare fast-ish path is in
+        * _pthread_mutex_firstfit_unlock()
+        */
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       // We're giving up the mutex one way or the other, so go ahead and
+       // update the owner to 0 so that once the CAS below succeeds, there
+       // is no stale ownership information. If the CAS of the seqaddr
+       // fails, we may loop, but it's still valid for the owner to be
+       // SWITCHING/0
+       os_atomic_store(tidaddr, 0, relaxed);
+
+       do {
+               newseq = oldseq;
+
+               if (diff_genseq(oldseq.lgenval, oldseq.ugenval) == 0) {
+                       // No outstanding waiters in kernel, we can simply drop the E-bit
+                       // and return.
+                       newseq.lgenval &= ~PTH_RWL_EBIT;
+               } else {
+                       return _pthread_mutex_firstfit_unlock_slow(mutex);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       release)));
+
+       return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_firstfit_lock(pthread_mutex_t *omutex, bool trylock)
+{
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+               return _pthread_mutex_lock_init_slow(mutex, trylock);
+       }
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_lock(mutex, trylock);
+       }
+
+#if ENABLE_USERSPACE_TRACE
+       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+#elif PLOCKSTAT
+       if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+               return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+       }
+#endif
+
+       /*
+        * This is the first-fit fast path. The fairshare fast-ish path is in
+        * _pthread_mutex_firstfit_lock()
+        */
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t selfid = _pthread_selfid_direct();
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
+               return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+       }
+
+       bool gotlock;
+       do {
+               newseq = oldseq;
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+
+               if (trylock && !gotlock) {
+                       // A trylock on a held lock will fail immediately. But since
+                       // we did not load the sequence words atomically, perform a
+                       // no-op CAS64 to ensure that nobody has unlocked concurrently.
+               } else if (os_likely(gotlock)) {
+                       // In first-fit, getting the lock simply adds the E-bit
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       acquire)));
+
+       if (os_likely(gotlock)) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+               return 0;
+       } else if (trylock) {
+               return EBUSY;
+       } else {
+               __builtin_trap();
+       }
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+       return _pthread_mutex_firstfit_lock(mutex, false);
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+       return _pthread_mutex_firstfit_lock(mutex, true);
+}
+
 
 PTHREAD_ALWAYS_INLINE
 static inline int
 
 PTHREAD_ALWAYS_INLINE
 static inline int
@@ -1040,7 +1505,7 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
                }
                mutex->prioceiling = (int16_t)attr->prioceiling;
                mutex->mtxopts.options.protocol = attr->protocol;
                }
                mutex->prioceiling = (int16_t)attr->prioceiling;
                mutex->mtxopts.options.protocol = attr->protocol;
-               mutex->mtxopts.options.policy = attr->policy;
+               mutex->mtxopts.options.policy = attr->opt;
                mutex->mtxopts.options.type = attr->type;
                mutex->mtxopts.options.pshared = attr->pshared;
        } else {
                mutex->mtxopts.options.type = attr->type;
                mutex->mtxopts.options.pshared = attr->pshared;
        } else {
@@ -1063,9 +1528,9 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
                mutex->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
                mutex->mtxopts.options.protocol = _PTHREAD_DEFAULT_PROTOCOL;
                if (static_type != 3) {
                mutex->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
                mutex->mtxopts.options.protocol = _PTHREAD_DEFAULT_PROTOCOL;
                if (static_type != 3) {
-                       mutex->mtxopts.options.policy = __pthread_mutex_default_policy;
+                       mutex->mtxopts.options.policy = __pthread_mutex_default_opt_policy;
                } else {
                } else {
-                       mutex->mtxopts.options.policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+                       mutex->mtxopts.options.policy = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
                }
                mutex->mtxopts.options.pshared = _PTHREAD_DEFAULT_PSHARED;
        }
                }
                mutex->mtxopts.options.pshared = _PTHREAD_DEFAULT_PSHARED;
        }
@@ -1089,7 +1554,8 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
 
        long sig = _PTHREAD_MUTEX_SIG;
        if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL &&
 
        long sig = _PTHREAD_MUTEX_SIG;
        if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL &&
-                       mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FAIRSHARE) {
+                       (_pthread_mutex_is_fairshare(mutex) ||
+                        _pthread_mutex_is_firstfit(mutex))) {
                // rdar://18148854 _pthread_mutex_lock & pthread_mutex_unlock fastpath
                sig = _PTHREAD_MUTEX_SIG_fast;
        }
                // rdar://18148854 _pthread_mutex_lock & pthread_mutex_unlock fastpath
                sig = _PTHREAD_MUTEX_SIG_fast;
        }
index 85358df6e7fb8d91376f6fcf63ce1e98411d2612..5b0bc9a70916ccf5a155683294dcbed23f0fd301 100644 (file)
@@ -61,8 +61,6 @@
 #include <platform/compat.h> // for bzero
 #endif
 
 #include <platform/compat.h> // for bzero
 #endif
 
-extern int __unix_conforming;
-
 #ifdef PLOCKSTAT
 #include "plockstat.h"
 #else /* !PLOCKSTAT */
 #ifdef PLOCKSTAT
 #include "plockstat.h"
 #else /* !PLOCKSTAT */
@@ -513,7 +511,7 @@ _pthread_rwlock_updateval(_pthread_rwlock *rwlock, uint32_t updateval)
        rwlock_seq_load(seqaddr, &oldseq, RWLOCK_SEQ_LS);
        do {
                newseq = oldseq;
        rwlock_seq_load(seqaddr, &oldseq, RWLOCK_SEQ_LS);
        do {
                newseq = oldseq;
-               if (isoverlap || is_rws_setunlockinit(oldseq.rw_seq) != 0) {
+               if (isoverlap || is_rws_unlockinit_set(oldseq.rw_seq)) {
                        // Set S word to the specified value
                        uint32_t savebits = (oldseq.rw_seq & PTHRW_RWS_SAVEMASK);
                        newseq.lcntval = _pthread_rwlock_modbits(oldseq.lcntval, updateval,
                        // Set S word to the specified value
                        uint32_t savebits = (oldseq.rw_seq & PTHRW_RWS_SAVEMASK);
                        newseq.lcntval = _pthread_rwlock_modbits(oldseq.lcntval, updateval,
@@ -763,7 +761,7 @@ retry:
                                newseq.lcntval |= PTH_RWL_KBIT | PTH_RWL_WBIT;
                        }
                        newseq.lcntval += PTHRW_INC;
                                newseq.lcntval |= PTH_RWL_KBIT | PTH_RWL_WBIT;
                        }
                        newseq.lcntval += PTHRW_INC;
-                       if (is_rws_setseq(oldseq.rw_seq)) {
+                       if (is_rws_sbit_set(oldseq.rw_seq)) {
                                // Clear the S bit and set S to L
                                newseq.rw_seq &= (PTHRW_BIT_MASK & ~PTH_RWS_SBIT);
                                newseq.rw_seq |= (oldseq.lcntval & PTHRW_COUNT_MASK);
                                // Clear the S bit and set S to L
                                newseq.rw_seq &= (PTHRW_BIT_MASK & ~PTH_RWS_SBIT);
                                newseq.rw_seq |= (oldseq.lcntval & PTHRW_COUNT_MASK);
index 3a772668ee81d3ae292dd0afe4cdb94da7bf4e68..54b1bb020d060eedc48e723f994af6a0c49e81e1 100644 (file)
 // __pthread_tsd_end is the end of dynamic keys.
 
 static const int __pthread_tsd_first = __TSD_RESERVED_MAX + 1;
 // __pthread_tsd_end is the end of dynamic keys.
 
 static const int __pthread_tsd_first = __TSD_RESERVED_MAX + 1;
-static int __pthread_tsd_max = __pthread_tsd_first;
 static const int __pthread_tsd_start = _INTERNAL_POSIX_THREAD_KEYS_MAX;
 static const int __pthread_tsd_end = _INTERNAL_POSIX_THREAD_KEYS_END;
 
 static const int __pthread_tsd_start = _INTERNAL_POSIX_THREAD_KEYS_MAX;
 static const int __pthread_tsd_end = _INTERNAL_POSIX_THREAD_KEYS_END;
 
-static int __pthread_key_legacy_behaviour = 0;
-static int __pthread_key_legacy_behaviour_log = 0;
+static int __pthread_tsd_max = __pthread_tsd_first;
+static _pthread_lock __pthread_tsd_lock = _PTHREAD_LOCK_INITIALIZER;
+static bool __pthread_key_legacy_behaviour = 0;
+static bool __pthread_key_legacy_behaviour_log = 0;
 
 // Omit support for pthread key destructors in the static archive for dyld.
 // dyld does not create and destroy threads so these are not necessary.
 
 // Omit support for pthread key destructors in the static archive for dyld.
 // dyld does not create and destroy threads so these are not necessary.
@@ -80,15 +81,17 @@ static struct {
        uintptr_t destructor;
 } _pthread_keys[_INTERNAL_POSIX_THREAD_KEYS_END];
 
        uintptr_t destructor;
 } _pthread_keys[_INTERNAL_POSIX_THREAD_KEYS_END];
 
-static _pthread_lock tsd_lock = _PTHREAD_LOCK_INITIALIZER;
-
 // The pthread_tsd destruction order can be reverted to the old (pre-10.11) order
 // by setting this environment variable.
 void
 _pthread_key_global_init(const char *envp[])
 {
 // The pthread_tsd destruction order can be reverted to the old (pre-10.11) order
 // by setting this environment variable.
 void
 _pthread_key_global_init(const char *envp[])
 {
-       __pthread_key_legacy_behaviour = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER") ? 1 : 0;
-       __pthread_key_legacy_behaviour_log = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG") ? 1 : 0;
+       if (_simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER")) {
+               __pthread_key_legacy_behaviour = true;
+       }
+       if (_simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG")) {
+               __pthread_key_legacy_behaviour_log = true;
+       }
 }
 
 // Returns true if successful, false if destructor was already set.
 }
 
 // Returns true if successful, false if destructor was already set.
@@ -133,7 +136,7 @@ pthread_key_create(pthread_key_t *key, void (*destructor)(void *))
        int res = EAGAIN; // Returns EAGAIN if key cannot be allocated.
        pthread_key_t k;
 
        int res = EAGAIN; // Returns EAGAIN if key cannot be allocated.
        pthread_key_t k;
 
-       _PTHREAD_LOCK(tsd_lock);
+       _PTHREAD_LOCK(__pthread_tsd_lock);
        for (k = __pthread_tsd_start; k < __pthread_tsd_end; k++) {
                if (_pthread_key_set_destructor(k, destructor)) {
                        *key = k;
        for (k = __pthread_tsd_start; k < __pthread_tsd_end; k++) {
                if (_pthread_key_set_destructor(k, destructor)) {
                        *key = k;
@@ -141,7 +144,7 @@ pthread_key_create(pthread_key_t *key, void (*destructor)(void *))
                        break;
                }
        }
                        break;
                }
        }
-       _PTHREAD_UNLOCK(tsd_lock);
+       _PTHREAD_UNLOCK(__pthread_tsd_lock);
 
        return res;
 }
 
        return res;
 }
@@ -151,12 +154,12 @@ pthread_key_delete(pthread_key_t key)
 {
        int res = EINVAL; // Returns EINVAL if key is not allocated.
 
 {
        int res = EINVAL; // Returns EINVAL if key is not allocated.
 
-       _PTHREAD_LOCK(tsd_lock);
+       _PTHREAD_LOCK(__pthread_tsd_lock);
        if (key >= __pthread_tsd_start && key < __pthread_tsd_end) {
                if (_pthread_key_unset_destructor(key)) {
                        struct _pthread *p;
                        _PTHREAD_LOCK(_pthread_list_lock);
        if (key >= __pthread_tsd_start && key < __pthread_tsd_end) {
                if (_pthread_key_unset_destructor(key)) {
                        struct _pthread *p;
                        _PTHREAD_LOCK(_pthread_list_lock);
-                       TAILQ_FOREACH(p, &__pthread_head, plist) {
+                       TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
                                // No lock for word-sized write.
                                p->tsd[key] = 0;
                        }
                                // No lock for word-sized write.
                                p->tsd[key] = 0;
                        }
@@ -164,7 +167,7 @@ pthread_key_delete(pthread_key_t key)
                        res = 0;
                }
        }
                        res = 0;
                }
        }
-       _PTHREAD_UNLOCK(tsd_lock);
+       _PTHREAD_UNLOCK(__pthread_tsd_lock);
 
        return res;
 }
 
        return res;
 }
@@ -188,7 +191,7 @@ pthread_setspecific(pthread_key_t key, const void *value)
                                _pthread_key_set_destructor(key, NULL);
                        }
                        if (key > self->max_tsd_key) {
                                _pthread_key_set_destructor(key, NULL);
                        }
                        if (key > self->max_tsd_key) {
-                               self->max_tsd_key = (int)key;
+                               self->max_tsd_key = (uint16_t)key;
                        }
                }
        }
                        }
                }
        }
@@ -342,12 +345,12 @@ pthread_key_init_np(int key, void (*destructor)(void *))
 {
        int res = EINVAL; // Returns EINVAL if key is out of range.
        if (key >= __pthread_tsd_first && key < __pthread_tsd_start) {
 {
        int res = EINVAL; // Returns EINVAL if key is out of range.
        if (key >= __pthread_tsd_first && key < __pthread_tsd_start) {
-               _PTHREAD_LOCK(tsd_lock);
+               _PTHREAD_LOCK(__pthread_tsd_lock);
                _pthread_key_set_destructor(key, destructor);
                if (key > __pthread_tsd_max) {
                        __pthread_tsd_max = key;
                }
                _pthread_key_set_destructor(key, destructor);
                if (key > __pthread_tsd_max) {
                        __pthread_tsd_max = key;
                }
-               _PTHREAD_UNLOCK(tsd_lock);
+               _PTHREAD_UNLOCK(__pthread_tsd_lock);
                res = 0;
        }
        return res;
                res = 0;
        }
        return res;
index b31098a3d825a2a24584f1a8062e12be1647340c..ef360896e694091bf3b2d021a3132c6d26d53d7e 100644 (file)
--- a/src/qos.c
+++ b/src/qos.c
@@ -35,8 +35,6 @@
 #include "workqueue_private.h"
 #include "qos_private.h"
 
 #include "workqueue_private.h"
 #include "qos_private.h"
 
-static pthread_priority_t _main_qos = QOS_CLASS_UNSPECIFIED;
-
 #define PTHREAD_OVERRIDE_SIGNATURE     (0x6f766572)
 #define PTHREAD_OVERRIDE_SIG_DEAD      (0x7265766f)
 
 #define PTHREAD_OVERRIDE_SIGNATURE     (0x6f766572)
 #define PTHREAD_OVERRIDE_SIG_DEAD      (0x7265766f)
 
@@ -49,172 +47,145 @@ struct pthread_override_s
        bool malloced;
 };
 
        bool malloced;
 };
 
-void
-_pthread_set_main_qos(pthread_priority_t qos)
+thread_qos_t
+_pthread_qos_class_to_thread_qos(qos_class_t qos)
 {
 {
-       _main_qos = qos;
+       switch (qos) {
+       case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
+       case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
+       case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
+       case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
+       case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
+       case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
+       default: return THREAD_QOS_UNSPECIFIED;
+       }
 }
 
 }
 
-int
-pthread_attr_set_qos_class_np(pthread_attr_t *__attr,
-                                                         qos_class_t __qos_class,
-                                                         int __relative_priority)
-{
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
+static inline qos_class_t
+_pthread_qos_class_from_thread_qos(thread_qos_t tqos)
+{
+       static const qos_class_t thread_qos_to_qos_class[THREAD_QOS_LAST] = {
+               [THREAD_QOS_UNSPECIFIED]      = QOS_CLASS_UNSPECIFIED,
+               [THREAD_QOS_MAINTENANCE]      = QOS_CLASS_MAINTENANCE,
+               [THREAD_QOS_BACKGROUND]       = QOS_CLASS_BACKGROUND,
+               [THREAD_QOS_UTILITY]          = QOS_CLASS_UTILITY,
+               [THREAD_QOS_LEGACY]           = QOS_CLASS_DEFAULT,
+               [THREAD_QOS_USER_INITIATED]   = QOS_CLASS_USER_INITIATED,
+               [THREAD_QOS_USER_INTERACTIVE] = QOS_CLASS_USER_INTERACTIVE,
+       };
+       if (os_unlikely(tqos >= THREAD_QOS_LAST)) return QOS_CLASS_UNSPECIFIED;
+       return thread_qos_to_qos_class[tqos];
+}
 
 
-       if (__relative_priority > 0 || __relative_priority < QOS_MIN_RELATIVE_PRIORITY) {
-               return EINVAL;
+static inline thread_qos_t
+_pthread_validate_qos_class_and_relpri(qos_class_t qc, int relpri)
+{
+       if (relpri > 0 || relpri < QOS_MIN_RELATIVE_PRIORITY) {
+               return THREAD_QOS_UNSPECIFIED;
        }
        }
+       return _pthread_qos_class_to_thread_qos(qc);
+}
 
 
-       int ret = EINVAL;
-       if (__attr->sig == _PTHREAD_ATTR_SIG) {
-               if (!__attr->schedset) {
-                       __attr->qosclass = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
-                       __attr->qosset = 1;
-                       ret = 0;
-               }
-       }
+static inline void
+_pthread_priority_split(pthread_priority_t pp, qos_class_t *qc, int *relpri)
+{
+       thread_qos_t qos = _pthread_priority_thread_qos(pp);
+       if (qc) *qc = _pthread_qos_class_from_thread_qos(qos);
+       if (relpri) *relpri = _pthread_priority_relpri(pp);
+}
 
 
-       return ret;
+void
+_pthread_set_main_qos(pthread_priority_t qos)
+{
+       _main_qos = (uint32_t)qos;
 }
 
 int
 }
 
 int
-pthread_attr_get_qos_class_np(pthread_attr_t * __restrict __attr,
-                                                         qos_class_t * __restrict __qos_class,
-                                                         int * __restrict __relative_priority)
+pthread_attr_set_qos_class_np(pthread_attr_t *attr, qos_class_t qc, int relpri)
 {
 {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
+       thread_qos_t qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+       if (attr->sig != _PTHREAD_ATTR_SIG || attr->schedset) {
+               return EINVAL;
        }
 
        }
 
-       int ret = EINVAL;
-       if (__attr->sig == _PTHREAD_ATTR_SIG) {
-               if (__attr->qosset) {
-                       qos_class_t qos; int relpri;
-                       _pthread_priority_split_newest(__attr->qosclass, qos, relpri);
+       attr->qosclass = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
+       attr->qosset = 1;
+       attr->schedset = 0;
+       return 0;
+}
 
 
-                       if (__qos_class) { *__qos_class = qos; }
-                       if (__relative_priority) { *__relative_priority = relpri; }
-               } else {
-                       if (__qos_class) { *__qos_class = 0; }
-                       if (__relative_priority) { *__relative_priority = 0; }
-               }
-               ret = 0;
+int
+pthread_attr_get_qos_class_np(pthread_attr_t *attr, qos_class_t *qc, int *relpri)
+{
+       if (attr->sig != _PTHREAD_ATTR_SIG) {
+               return EINVAL;
        }
 
        }
 
-       return ret;
+       _pthread_priority_split(attr->qosset ? attr->qosclass : 0, qc, relpri);
+       return 0;
 }
 
 int
 }
 
 int
-pthread_set_qos_class_self_np(qos_class_t __qos_class,
-                                                         int __relative_priority)
+pthread_set_qos_class_self_np(qos_class_t qc, int relpri)
 {
 {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
-
-       if (__relative_priority > 0 || __relative_priority < QOS_MIN_RELATIVE_PRIORITY) {
+       thread_qos_t qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+       if (!qos) {
                return EINVAL;
        }
 
                return EINVAL;
        }
 
-       pthread_priority_t priority = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
-
-       if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-               return _pthread_set_properties_self(_PTHREAD_SET_SELF_QOS_FLAG, priority, 0);
-       } else {
-               /* We set the thread QoS class in the TSD and then call into the kernel to
-                * read the value out of it and set the QoS class.
-                */
-               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
-               mach_port_t kport = _pthread_kernel_thread(pthread_self());
-               int res = __bsdthread_ctl(BSDTHREAD_CTL_SET_QOS, kport, &pthread_self()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS], 0);
-
-               if (res == -1) {
-                       res = errno;
-               }
-
-               return res;
-       }
+       pthread_priority_t pp = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
+       return _pthread_set_properties_self(_PTHREAD_SET_SELF_QOS_FLAG, pp, 0);
 }
 
 int
 }
 
 int
-pthread_set_qos_class_np(pthread_t __pthread,
-                                                qos_class_t __qos_class,
-                                                int __relative_priority)
+pthread_set_qos_class_np(pthread_t thread, qos_class_t qc, int relpri)
 {
 {
-       if (__pthread != pthread_self()) {
+       if (thread != pthread_self()) {
                /* The kext now enforces this anyway, if we check here too, it allows us to call
                 * _pthread_set_properties_self later if we can.
                 */
                return EPERM;
        }
                /* The kext now enforces this anyway, if we check here too, it allows us to call
                 * _pthread_set_properties_self later if we can.
                 */
                return EPERM;
        }
-
-       return pthread_set_qos_class_self_np(__qos_class, __relative_priority);
+       return pthread_set_qos_class_self_np(qc, relpri);
 }
 
 int
 }
 
 int
-pthread_get_qos_class_np(pthread_t __pthread,
-                                                qos_class_t * __restrict __qos_class,
-                                                int * __restrict __relative_priority)
+pthread_get_qos_class_np(pthread_t thread, qos_class_t *qc, int *relpri)
 {
 {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
-
-       pthread_priority_t priority;
-
-       if (__pthread == pthread_self()) {
-               priority = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-       } else {
-               priority = __pthread->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
-       }
-
-       qos_class_t qos; int relpri;
-       _pthread_priority_split_newest(priority, qos, relpri);
-
-       if (__qos_class) { *__qos_class = qos; }
-       if (__relative_priority) { *__relative_priority = relpri; }
-
+       pthread_priority_t pp = thread->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
+       _pthread_priority_split(pp, qc, relpri);
        return 0;
 }
 
 qos_class_t
 qos_class_self(void)
 {
        return 0;
 }
 
 qos_class_t
 qos_class_self(void)
 {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return QOS_CLASS_UNSPECIFIED;
-       }
-
-       pthread_priority_t p = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-       qos_class_t c = _pthread_priority_get_qos_newest(p);
-
-       return c;
+       pthread_priority_t pp;
+       pp = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
+       return _pthread_qos_class_from_thread_qos(_pthread_priority_thread_qos(pp));
 }
 
 qos_class_t
 qos_class_main(void)
 {
 }
 
 qos_class_t
 qos_class_main(void)
 {
-       return _pthread_priority_get_qos_newest(_main_qos);
+       pthread_priority_t pp = _main_qos;
+       return _pthread_qos_class_from_thread_qos(_pthread_priority_thread_qos(pp));
 }
 
 pthread_priority_t
 }
 
 pthread_priority_t
-_pthread_qos_class_encode(qos_class_t qos_class, int relative_priority, unsigned long flags)
+_pthread_qos_class_encode(qos_class_t qc, int relpri, unsigned long flags)
 {
 {
-       return _pthread_priority_make_newest(qos_class, relative_priority, flags);
+       thread_qos_t qos = _pthread_qos_class_to_thread_qos(qc);
+       return _pthread_priority_make_from_thread_qos(qos, relpri, flags);
 }
 
 qos_class_t
 }
 
 qos_class_t
-_pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, unsigned long *flags)
+_pthread_qos_class_decode(pthread_priority_t pp, int *relpri, unsigned long *flags)
 {
 {
-       qos_class_t qos; int relpri;
-
-       _pthread_priority_split_newest(priority, qos, relpri);
-
-       if (relative_priority) { *relative_priority = relpri; }
-       if (flags) { *flags = _pthread_priority_get_flags(priority); }
-       return qos;
+       qos_class_t qc;
+       _pthread_priority_split(pp, &qc, relpri);
+       if (flags) *flags = (pp & _PTHREAD_PRIORITY_FLAGS_MASK);
+       return qc;
 }
 
 // Encode a legacy workqueue API priority into a pthread_priority_t. This API
 }
 
 // Encode a legacy workqueue API priority into a pthread_priority_t. This API
@@ -222,35 +193,48 @@ _pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, u
 pthread_priority_t
 _pthread_qos_class_encode_workqueue(int queue_priority, unsigned long flags)
 {
 pthread_priority_t
 _pthread_qos_class_encode_workqueue(int queue_priority, unsigned long flags)
 {
+       thread_qos_t qos;
        switch (queue_priority) {
        switch (queue_priority) {
-       case WORKQ_HIGH_PRIOQUEUE:
-               return _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, flags);
-       case WORKQ_DEFAULT_PRIOQUEUE:
-               return _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, flags);
-       case WORKQ_LOW_PRIOQUEUE:
+       case WORKQ_HIGH_PRIOQUEUE:      qos = THREAD_QOS_USER_INTERACTIVE; break;
+       case WORKQ_DEFAULT_PRIOQUEUE:   qos = THREAD_QOS_LEGACY; break;
        case WORKQ_NON_INTERACTIVE_PRIOQUEUE:
        case WORKQ_NON_INTERACTIVE_PRIOQUEUE:
-               return _pthread_priority_make_newest(QOS_CLASS_UTILITY, 0, flags);
-       case WORKQ_BG_PRIOQUEUE:
-               return _pthread_priority_make_newest(QOS_CLASS_BACKGROUND, 0, flags);
-       /* Legacy dispatch does not use QOS_CLASS_MAINTENANCE, so no need to handle it here */
+       case WORKQ_LOW_PRIOQUEUE:       qos = THREAD_QOS_UTILITY; break;
+       case WORKQ_BG_PRIOQUEUE:        qos = THREAD_QOS_BACKGROUND; break;
        default:
                __pthread_abort();
        }
        default:
                __pthread_abort();
        }
+       return _pthread_priority_make_from_thread_qos(qos, 0, flags);
 }
 
 }
 
+#define _PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP \
+               (_PTHREAD_SET_SELF_QOS_FLAG | _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG | \
+                _PTHREAD_SET_SELF_TIMESHARE_FLAG)
+
 int
 int
-_pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t priority, mach_port_t voucher)
+_pthread_set_properties_self(_pthread_set_flags_t flags,
+               pthread_priority_t priority, mach_port_t voucher)
 {
 {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_SETSELF)) {
-               return ENOTSUP;
+       pthread_t self = pthread_self();
+       _pthread_set_flags_t kflags = flags;
+       int rv = 0;
+
+       if (self->wqoutsideqos && (flags & _PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP)) {
+               // A number of properties cannot be altered if we are a workloop
+               // thread that has outside of QoS properties applied to it.
+               kflags &= ~_PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP;
+               if (kflags == 0) goto skip;
        }
 
        }
 
-       int rv = __bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, flags);
+       rv = __bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, kflags);
 
 
-       /* Set QoS TSD if we succeeded or only failed the voucher half. */
+skip:
+        // Set QoS TSD if we succeeded, or only failed the voucher portion of the
+        // call. Additionally, if we skipped setting QoS because of outside-of-QoS
+        // attributes then we still want to set the TSD in userspace.
        if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
                if (rv == 0 || errno == ENOENT) {
        if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
                if (rv == 0 || errno == ENOENT) {
-                       _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
+                       _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, 
+                                       priority);
                }
        }
 
                }
        }
 
@@ -263,37 +247,21 @@ _pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t prio
 int
 pthread_set_fixedpriority_self(void)
 {
 int
 pthread_set_fixedpriority_self(void)
 {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
-
-       if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-               return _pthread_set_properties_self(_PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG, 0, 0);
-       } else {
-               return ENOTSUP;
-       }
+       return _pthread_set_properties_self(_PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG, 0, 0);
 }
 
 int
 pthread_set_timeshare_self(void)
 {
 }
 
 int
 pthread_set_timeshare_self(void)
 {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
-
-       if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-               return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0);
-       } else {
-               return ENOTSUP;
-       }
+       return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0);
 }
 
 }
 
-
 pthread_override_t
 pthread_override_t
-pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_class, int __relative_priority)
+pthread_override_qos_class_start_np(pthread_t thread,  qos_class_t qc, int relpri)
 {
        pthread_override_t rv;
        kern_return_t kr;
 {
        pthread_override_t rv;
        kern_return_t kr;
+       thread_qos_t qos;
        int res = 0;
 
        /* For now, we don't have access to malloc. So we'll have to vm_allocate this, which means the tiny struct is going
        int res = 0;
 
        /* For now, we don't have access to malloc. So we'll have to vm_allocate this, which means the tiny struct is going
@@ -301,23 +269,30 @@ pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_clas
         */
        bool did_malloc = true;
 
         */
        bool did_malloc = true;
 
+       qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+       if (qos == THREAD_QOS_UNSPECIFIED) {
+               return (_Nonnull pthread_override_t)NULL;
+       }
+
        mach_vm_address_t vm_addr = malloc(sizeof(struct pthread_override_s));
        if (!vm_addr) {
                vm_addr = vm_page_size;
                did_malloc = false;
 
        mach_vm_address_t vm_addr = malloc(sizeof(struct pthread_override_s));
        if (!vm_addr) {
                vm_addr = vm_page_size;
                did_malloc = false;
 
-               kr = mach_vm_allocate(mach_task_self(), &vm_addr, round_page(sizeof(struct pthread_override_s)), VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH) | VM_FLAGS_ANYWHERE);
+               kr = mach_vm_allocate(mach_task_self(), &vm_addr,
+                               round_page(sizeof(struct pthread_override_s)),
+                               VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH) | VM_FLAGS_ANYWHERE);
                if (kr != KERN_SUCCESS) {
                        errno = ENOMEM;
                if (kr != KERN_SUCCESS) {
                        errno = ENOMEM;
-                       return (_Nonnull pthread_override_t) NULL;
+                       return (_Nonnull pthread_override_t)NULL;
                }
        }
 
        rv = (pthread_override_t)vm_addr;
        rv->sig = PTHREAD_OVERRIDE_SIGNATURE;
                }
        }
 
        rv = (pthread_override_t)vm_addr;
        rv->sig = PTHREAD_OVERRIDE_SIGNATURE;
-       rv->pthread = __pthread;
-       rv->kthread = pthread_mach_thread_np(__pthread);
-       rv->priority = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
+       rv->pthread = thread;
+       rv->kthread = pthread_mach_thread_np(thread);
+       rv->priority = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
        rv->malloced = did_malloc;
 
        /* To ensure that the kernel port that we keep stays valid, we retain it here. */
        rv->malloced = did_malloc;
 
        /* To ensure that the kernel port that we keep stays valid, we retain it here. */
@@ -342,7 +317,7 @@ pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_clas
                }
                rv = NULL;
        }
                }
                rv = NULL;
        }
-       return (_Nonnull pthread_override_t) rv;
+       return (_Nonnull pthread_override_t)rv;
 }
 
 int
 }
 
 int
@@ -523,7 +498,11 @@ _pthread_workqueue_parallelism_for_priority(int qos, unsigned long flags)
 int
 pthread_qos_max_parallelism(qos_class_t qos, unsigned long flags)
 {
 int
 pthread_qos_max_parallelism(qos_class_t qos, unsigned long flags)
 {
-       int thread_qos = _pthread_qos_class_to_thread_qos(qos);
+       thread_qos_t thread_qos;
+       if (qos == QOS_CLASS_UNSPECIFIED) {
+               qos = QOS_CLASS_DEFAULT; // <rdar://problem/35080198>
+       }
+       thread_qos = _pthread_qos_class_to_thread_qos(qos);
        if (thread_qos == THREAD_QOS_UNSPECIFIED) {
                errno = EINVAL;
                return -1;
        if (thread_qos == THREAD_QOS_UNSPECIFIED) {
                errno = EINVAL;
                return -1;
index 761103eca3caaf15d21ada2a89c43a0ba68ac511..22cc6892466976521722773e52b9b389c45dc257 100644 (file)
  * MkLinux
  */
 
  * MkLinux
  */
 
+#include "internal.h"
+
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
 /*
  * Machine specific support for thread initialization
  */
 
 /*
  * Machine specific support for thread initialization
  */
 
-#include "internal.h"
-#include <platform/string.h>
+// NOTE: no resolvers, so this file must not contain any atomic operations
 
 
+PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp);
 
 /*
  * Set up the initial state of a MACH thread
  */
 void
 
 /*
  * Set up the initial state of a MACH thread
  */
 void
-_pthread_setup(pthread_t thread,
+_pthread_setup_suspended(pthread_t thread,
               void (*routine)(pthread_t),
               void (*routine)(pthread_t),
-              void *vsp,
-              int suspended,
-              int needresume)
+              void *vsp)
 {
 #if defined(__i386__)
        i386_thread_state_t state = { };
 {
 #if defined(__i386__)
        i386_thread_state_t state = { };
@@ -71,20 +72,12 @@ _pthread_setup(pthread_t thread,
        x86_thread_state64_t state = { };
        thread_state_flavor_t flavor = x86_THREAD_STATE64;
        mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
        x86_thread_state64_t state = { };
        thread_state_flavor_t flavor = x86_THREAD_STATE64;
        mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
-#elif defined(__arm__)
-       arm_thread_state_t state = { };
-       thread_state_flavor_t flavor = ARM_THREAD_STATE;
-       mach_msg_type_number_t count = ARM_THREAD_STATE_COUNT;
 #else
 #error _pthread_setup not defined for this architecture
 #endif
 
 #else
 #error _pthread_setup not defined for this architecture
 #endif
 
-       if (suspended) {
-               (void)thread_get_state(_pthread_kernel_thread(thread),
-                                    flavor,
-                                    (thread_state_t)&state,
-                                    &count);
-       }
+       (void)thread_get_state(_pthread_kernel_thread(thread),
+                       flavor, (thread_state_t)&state, &count);
 
 #if defined(__i386__)
        uintptr_t *sp = vsp;
 
 #if defined(__i386__)
        uintptr_t *sp = vsp;
@@ -110,46 +103,10 @@ _pthread_setup(pthread_t thread,
        state.__rdi = (uintptr_t)thread;        // argument to function
        *--sp = 0;                              // fake return address
        state.__rsp = (uintptr_t)sp;            // set stack pointer
        state.__rdi = (uintptr_t)thread;        // argument to function
        *--sp = 0;                              // fake return address
        state.__rsp = (uintptr_t)sp;            // set stack pointer
-#elif defined(__arm__)
-       state.__pc = (uintptr_t)routine;
-
-       // Detect switch to thumb mode.
-       if (state.__pc & 1) {
-           state.__pc &= ~1;
-           state.__cpsr |= 0x20; /* PSR_THUMB */
-       }
-
-       state.__sp = (uintptr_t)vsp - C_ARGSAVE_LEN - C_RED_ZONE;
-       state.__r[0] = (uintptr_t)thread;
 #else
 #else
-#error _pthread_setup not defined for this architecture
+#error _pthread_setup_suspended not defined for this architecture
 #endif
 
 #endif
 
-       if (suspended) {
-               (void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count);
-               if (needresume) {
-                       (void)thread_resume(_pthread_kernel_thread(thread));
-               }
-       } else {
-               mach_port_t kernel_thread;
-               (void)thread_create_running(mach_task_self(), flavor, (thread_state_t)&state, count, &kernel_thread);
-               _pthread_set_kernel_thread(thread, kernel_thread);
-       }
-}
-
-// pthread_setup initializes large structures to 0, which the compiler turns into a library call to memset. To avoid linking against
-// Libc, provide a simple wrapper that calls through to the libplatform primitives
-
-#undef memset
-__attribute__((visibility("hidden"))) void *
-memset(void *b, int c, size_t len)
-{
-       return _platform_memset(b, c, len);
-}
-
-#undef bzero
-__attribute__((visibility("hidden"))) void
-bzero(void *s, size_t n)
-{
-       _platform_bzero(s, n);
+       (void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count);
 }
 }
+#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX
index 408b1010c88423b59f8390a82d019f0de27571f3..84e2717439c7292b53be73eb0518e6020ea781d4 100644 (file)
@@ -12,10 +12,13 @@ include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.common
 TARGETS :=
 TARGETS += atfork
 TARGETS += bsdthread_set_self
 TARGETS :=
 TARGETS += atfork
 TARGETS += bsdthread_set_self
+TARGETS += stack
+TARGETS += stack_size
 TARGETS += cond
 #TARGETS += cond_hang3
 #TARGETS += cond_stress
 TARGETS += cond_timed
 TARGETS += cond
 #TARGETS += cond_hang3
 #TARGETS += cond_stress
 TARGETS += cond_timed
+TARGETS += cond_prepost
 TARGETS += custom_stack
 TARGETS += stack_aslr
 TARGETS += join
 TARGETS += custom_stack
 TARGETS += stack_aslr
 TARGETS += join
@@ -24,6 +27,7 @@ TARGETS += main_stack_custom
 TARGETS += detach
 #TARGETS += maxwidth
 TARGETS += mutex
 TARGETS += detach
 #TARGETS += maxwidth
 TARGETS += mutex
+TARGETS += mutex_prepost
 TARGETS += mutex_try
 TARGETS += once_cancel
 TARGETS += pthread_attr_setstacksize
 TARGETS += mutex_try
 TARGETS += once_cancel
 TARGETS += pthread_attr_setstacksize
@@ -35,6 +39,7 @@ TARGETS += pthread_introspection
 TARGETS += pthread_setspecific
 TARGETS += pthread_threadid_np
 TARGETS += pthread_get_qos_class_np
 TARGETS += pthread_setspecific
 TARGETS += pthread_threadid_np
 TARGETS += pthread_get_qos_class_np
+TARGETS += pthread_dependency
 #TARGETS += qos
 TARGETS += rdar_32848402
 #TARGETS += rwlock-22244050
 #TARGETS += qos
 TARGETS += rdar_32848402
 #TARGETS += rwlock-22244050
@@ -47,6 +52,7 @@ TARGETS += tsd
 #TARGETS += wq_kevent_stress
 TARGETS += wq_limits
 TARGETS += add_timer_termination
 #TARGETS += wq_kevent_stress
 TARGETS += wq_limits
 TARGETS += add_timer_termination
+TARGETS += perf_contended_mutex_rwlock
 
 OTHER_LTE_INCLUDE_FILES += \
        /usr/local/lib/libdarwintest_utils.dylib
 
 OTHER_LTE_INCLUDE_FILES += \
        /usr/local/lib/libdarwintest_utils.dylib
@@ -54,7 +60,7 @@ OTHER_LTE_INCLUDE_FILES += \
 OTHER_CFLAGS := -DDARWINTEST -Weverything \
                -Wno-vla -Wno-bad-function-cast -Wno-missing-noreturn \
                -Wno-missing-field-initializers -Wno-format-pedantic \
 OTHER_CFLAGS := -DDARWINTEST -Weverything \
                -Wno-vla -Wno-bad-function-cast -Wno-missing-noreturn \
                -Wno-missing-field-initializers -Wno-format-pedantic \
-               -Wno-gnu-folding-constant
+               -Wno-gnu-folding-constant -Wno-used-but-marked-unused
 OTHER_LDFLAGS := -ldarwintest_utils
 
 #TARGETS += main_stack_legacy // Disabled by default due to linker warnings
 OTHER_LDFLAGS := -ldarwintest_utils
 
 #TARGETS += main_stack_legacy // Disabled by default due to linker warnings
@@ -63,8 +69,8 @@ OTHER_LDFLAGS := -ldarwintest_utils
 #main_stack_legacy: ARCH_FLAGS = -arch i386
 #main_stack_legacy: DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=10.7
 
 #main_stack_legacy: ARCH_FLAGS = -arch i386
 #main_stack_legacy: DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=10.7
 
-main_stack_custom: OTHER_LDFLAGS += -Wl,-stack_size,0x14000
-main_stack_custom: OTHER_CFLAGS += -DSTACKSIZE=0x14000
+main_stack_custom: OTHER_LDFLAGS += -Wl,-stack_size,0x124000
+main_stack_custom: OTHER_CFLAGS += -DSTACKSIZE=0x124000
 
 bsdthread_set_self: OTHER_CFLAGS += -D_DARWIN_FEATURE_CLOCK_GETTIME
 
 
 bsdthread_set_self: OTHER_CFLAGS += -D_DARWIN_FEATURE_CLOCK_GETTIME
 
diff --git a/tests/cond_prepost.c b/tests/cond_prepost.c
new file mode 100644 (file)
index 0000000..df8b86e
--- /dev/null
@@ -0,0 +1,217 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <TargetConditionals.h>
+
+#include <pthread/pthread_spis.h>
+
+#include <sys/sysctl.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_multiprocess.h>
+
+// <rdar://problem/38810583> this test case is intended to test for the
+// specific issue found in this radar. That is, if:
+//
+//     1. A mutex is in first-fit policy mode, and
+//     2. is used as the mutex in a pthread_cond_wait (or timedwait), and
+//     3. the mutex has the K-bit set but has no kernel waiters, and
+//     4. the cvwait call preposts an unlock to the mutex
+//
+//  Under these conditions, the fact that the cvwait preposted an unlock to
+//  the paired mutex is lost during the call. The P-bit was never returned to
+//  userspace and the kwq in the kernel would continue to exist. If the same
+//  uaddr is then reused as another synchroniser type then we would often
+//  return EINVAL from the wait/lock function.
+//
+//  So this test is attempting to:
+//
+//     1. Repeatedly bang on a mutex+cvar for a number of iterations in the
+//        hope of triggering a cvwait prepost situation.
+//     2. Then destroy both the mutex and cvar, and reinitialise each memory
+//        location as the opposite type of synchroniser. Then cvwait once to
+//        trigger the failure condition.
+
+struct context {
+       union {
+               pthread_mutex_t mutex;
+               pthread_cond_t cond;
+       };
+       union {
+               pthread_mutex_t mutex2;
+               pthread_cond_t cond2;
+       };
+       long value;
+       long count;
+       long waiter;
+};
+
+static void *test_cond(void *ptr) {
+       struct context *context = ptr;
+       int res;
+
+       res = pthread_cond_wait(&context->cond, &context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "condition wait on condvar completed");
+       res = pthread_mutex_unlock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "unlock condvar mutex");
+       return NULL;
+}
+
+static void *test_cond_wake(void *ptr) {
+       struct context *context = ptr;
+       int res;
+
+       res = pthread_mutex_lock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "locked condvar mutex");
+       res = pthread_cond_signal(&context->cond);
+       T_ASSERT_POSIX_ZERO(res, "condvar signalled");
+       res = pthread_mutex_unlock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "dropped condvar mutex");
+
+       return NULL;
+}
+
+static void *test_thread(void *ptr) {
+       int res;
+       long old;
+       struct context *context = ptr;
+
+       int i = 0;
+       char *str;
+
+       do {
+               bool try = i++ & 1;
+               bool cond = i & 16;
+
+               if (!try){
+                       str = "pthread_mutex_lock";
+                       res = pthread_mutex_lock(&context->mutex);
+               } else {
+                       str = "pthread_mutex_trylock";
+                       res = pthread_mutex_trylock(&context->mutex);
+               }
+               if (res != 0) {
+                       if (try && res == EBUSY) {
+                               continue;
+                       }
+                       T_ASSERT_POSIX_ZERO(res, "[%ld] %s", context->count, str);
+               }
+
+               old = __sync_fetch_and_or(&context->value, 1);
+               if ((old & 1) != 0) {
+                       T_FAIL("[%ld] OR %lx\n", context->count, old);
+               }
+
+               old = __sync_fetch_and_and(&context->value, 0);
+               if ((old & 1) == 0) {
+                       T_FAIL("[%ld] AND %lx\n", context->count, old);
+               }
+
+               if (cond && !context->waiter) {
+                       context->waiter = 1;
+                       struct timespec ts = {
+                               .tv_sec = 0,
+                               .tv_nsec = 10ull * NSEC_PER_MSEC,
+                       };
+
+                       res = pthread_cond_timedwait_relative_np(&context->cond2, &context->mutex, &ts);
+                       if (res == ETIMEDOUT) {
+                               // ignore, should be the last thread out
+                       } else if (res) {
+                               T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_cond_wait",
+                                               context->count);
+                       }
+                       context->waiter = 0;
+                       res = pthread_mutex_unlock(&context->mutex);
+                       if (res) {
+                               T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_unlock",
+                                               context->count);
+                       }
+               } else {
+                       if (context->waiter) {
+                               res = pthread_cond_broadcast(&context->cond2);
+                               if (res) {
+                                       T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_cond_broadcast",
+                                                       context->count);
+                               }
+                       }
+                       res = pthread_mutex_unlock(&context->mutex);
+                       if (res) {
+                               T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_unlock",
+                                               context->count);
+                       }
+               }
+       } while (__sync_fetch_and_sub(&context->count, 1) > 0);
+       return NULL;
+}
+
+
+static void
+_test_condvar_prepost_race(void)
+{
+       struct context context = {
+               .mutex = PTHREAD_MUTEX_INITIALIZER,
+               .cond2 = PTHREAD_COND_INITIALIZER,
+               .value = 0,
+               .count = 10000,
+               .waiter = false,
+       };
+       int i;
+       int res;
+       int threads = 8;
+       pthread_t p[threads];
+       for (i = 0; i < threads; ++i) {
+               res = pthread_create(&p[i], NULL, test_thread, &context);
+               T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+       }
+       for (i = 0; i < threads; ++i) {
+               res = pthread_join(p[i], NULL);
+               T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+       }
+
+       T_PASS("initial pthread mutex storm completed");
+
+       pthread_mutex_destroy(&context.mutex);
+       pthread_cond_destroy(&context.cond2);
+
+       pthread_mutex_init(&context.mutex2, NULL);
+       pthread_cond_init(&context.cond, NULL);
+       res = pthread_mutex_lock(&context.mutex2);
+       T_ASSERT_POSIX_ZERO(res, "mutex lock for condition wait");
+       res = pthread_create(&p[0], NULL, test_cond, &context);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+       res = pthread_create(&p[1], NULL, test_cond_wake, &context);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+
+       res = pthread_join(p[0], NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+       res = pthread_join(p[1], NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+
+       pthread_cond_destroy(&context.cond);
+}
+
+T_DECL(cond_prepost_fairshare, "cond_prepost_fairshare (fairshare)",
+       T_META_ALL_VALID_ARCHS(YES),
+       T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=1"))
+{
+       int i;
+       int count = 100;
+       for (i=0; i < count; i++) {
+               _test_condvar_prepost_race();
+       }
+}
+
+T_DECL(cond_prepost_firstfit, "cond_prepost_firstfit (firstfit)",
+       T_META_ALL_VALID_ARCHS(YES),
+       T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
+{
+       int i;
+       int count = 100;
+       for (i=0; i < count; i++) {
+               _test_condvar_prepost_race();
+       }
+}
index eb0d660114fe8806fc4015f86027de420afdabcd..2e992a8874c18a4d8aa11c1a7770727e043a690d 100644 (file)
@@ -14,7 +14,7 @@ T_DECL(main_stack_custom, "tests the reported values for a custom main thread st
 
        struct rlimit lim;
        T_QUIET; T_ASSERT_POSIX_SUCCESS(getrlimit(RLIMIT_STACK, &lim), NULL);
 
        struct rlimit lim;
        T_QUIET; T_ASSERT_POSIX_SUCCESS(getrlimit(RLIMIT_STACK, &lim), NULL);
-       lim.rlim_cur = lim.rlim_cur / 8;
+       lim.rlim_cur = lim.rlim_cur + 32 * PAGE_SIZE;
        T_EXPECT_EQ(setrlimit(RLIMIT_STACK, &lim), -1, "setrlimit for stack should fail with custom stack");
        T_EXPECT_EQ((size_t)STACKSIZE, pthread_get_stacksize_np(pthread_self()), "reported stacksize shouldn't change");
 }
        T_EXPECT_EQ(setrlimit(RLIMIT_STACK, &lim), -1, "setrlimit for stack should fail with custom stack");
        T_EXPECT_EQ((size_t)STACKSIZE, pthread_get_stacksize_np(pthread_self()), "reported stacksize shouldn't change");
 }
index 0b1e1d47444e47d3c380b10466cc9f34b7f93e24..9fe02774e988bf6941aed668aa033fc72e141a75 100644 (file)
@@ -4,6 +4,7 @@
 #include <unistd.h>
 #include <stdbool.h>
 #include <errno.h>
 #include <unistd.h>
 #include <stdbool.h>
 #include <errno.h>
+#include <TargetConditionals.h>
 
 #include <pthread/pthread_spis.h>
 
 
 #include <pthread/pthread_spis.h>
 
@@ -104,7 +105,7 @@ check_process_default_mutex_policy(int expected_policy)
 T_DECL(mutex_default_policy,
                "Tests that the default mutex policy is fairshare")
 {
 T_DECL(mutex_default_policy,
                "Tests that the default mutex policy is fairshare")
 {
-       check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FAIRSHARE);
+       check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT);
 }
 
 T_DECL(mutex_default_policy_sysctl,
 }
 
 T_DECL(mutex_default_policy_sysctl,
@@ -133,7 +134,7 @@ T_HELPER_DECL(mutex_default_policy_sysctl_helper, "sysctl helper")
 
 T_DECL(mutex_default_policy_envvar,
                "Tests that setting the policy environment variable changes the default policy",
 
 T_DECL(mutex_default_policy_envvar,
                "Tests that setting the policy environment variable changes the default policy",
-               T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=2"))
+               T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
 {
        check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT);
 }
 {
        check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT);
 }
diff --git a/tests/mutex_prepost.c b/tests/mutex_prepost.c
new file mode 100644 (file)
index 0000000..6423e20
--- /dev/null
@@ -0,0 +1,157 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <TargetConditionals.h>
+
+#include <pthread/pthread_spis.h>
+
+#include <sys/sysctl.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_multiprocess.h>
+
+struct context {
+       union {
+               pthread_mutex_t mutex;
+               pthread_cond_t cond;
+       };
+       pthread_mutex_t mutex2;
+       long value;
+       long count;
+};
+
+static void *test_cond(void *ptr) {
+       struct context *context = ptr;
+       int res;
+       
+       res = pthread_cond_wait(&context->cond, &context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "condition wait on condvar completed");
+       res = pthread_mutex_unlock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "unlock condvar mutex");
+       return NULL;
+}
+
+static void *test_cond_wake(void *ptr) {
+       struct context *context = ptr;
+       int res;
+       
+       res = pthread_mutex_lock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "locked condvar mutex");
+       res = pthread_cond_signal(&context->cond);
+       T_ASSERT_POSIX_ZERO(res, "condvar signalled");
+       res = pthread_mutex_unlock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "dropped condvar mutex");
+
+       return NULL;
+}
+
+static void *test_thread(void *ptr) {
+       int res;
+       long old;
+       struct context *context = ptr;
+
+       int i = 0;
+       char *str;
+
+       do {
+               bool try = i++ & 1;
+
+               if (!try){
+                       str = "pthread_mutex_lock";
+                       res = pthread_mutex_lock(&context->mutex);
+               } else {
+                       str = "pthread_mutex_trylock";
+                       res = pthread_mutex_trylock(&context->mutex);
+               }
+               if (res != 0) {
+                       if (try && res == EBUSY) {
+                               continue;
+                       }
+                       T_ASSERT_POSIX_ZERO(res, "[%ld] %s", context->count, str);
+               }
+               
+               old = __sync_fetch_and_or(&context->value, 1);
+               if ((old & 1) != 0) {
+                       T_FAIL("[%ld] OR %lx\n", context->count, old);
+               }
+
+               old = __sync_fetch_and_and(&context->value, 0);
+               if ((old & 1) == 0) {
+                       T_FAIL("[%ld] AND %lx\n", context->count, old);
+               }
+       
+               res = pthread_mutex_unlock(&context->mutex);
+               if (res) {
+                       T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_lock", context->count);
+               }
+       } while (__sync_fetch_and_sub(&context->count, 1) > 0);
+       return NULL;
+}
+
+
+static void
+_test_condvar_prepost_race(void)
+{
+       struct context context = {
+               .mutex = PTHREAD_MUTEX_INITIALIZER,
+               .mutex2 = PTHREAD_MUTEX_INITIALIZER,
+               .value = 0,
+               .count = 1000,
+       };
+       int i;
+       int res;
+       int threads = 8;
+       pthread_t p[threads];
+       for (i = 0; i < threads; ++i) {
+               res = pthread_create(&p[i], NULL, test_thread, &context);
+               T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+       }
+       for (i = 0; i < threads; ++i) {
+               res = pthread_join(p[i], NULL);
+               T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+       }
+
+       T_PASS("initial pthread mutex storm completed");
+
+       pthread_mutex_destroy(&context.mutex);
+
+       pthread_cond_init(&context.cond, NULL);
+       res = pthread_mutex_lock(&context.mutex2);
+       T_ASSERT_POSIX_ZERO(res, "mutex lock for condition wait");
+       res = pthread_create(&p[0], NULL, test_cond, &context);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+       res = pthread_create(&p[1], NULL, test_cond_wake, &context);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+
+       res = pthread_join(p[0], NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+       res = pthread_join(p[1], NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+
+       pthread_cond_destroy(&context.cond);
+}
+
+T_DECL(mutex_prepost_fairshare, "pthread_mutex_prepost (fairshare)",
+       T_META_ALL_VALID_ARCHS(YES),
+       T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=1"))
+{
+       int i;
+       int count = 100;
+       for (i=0; i < count; i++) {
+               _test_condvar_prepost_race();
+       }
+}
+
+T_DECL(mutex_prepost_firstfit, "pthread_mutex_prepost (firstfit)",
+       T_META_ALL_VALID_ARCHS(YES),
+       T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
+{
+       int i;
+       int count = 100;
+       for (i=0; i < count; i++) {
+               _test_condvar_prepost_race();
+       }
+}
diff --git a/tests/perf_contended_mutex_rwlock.c b/tests/perf_contended_mutex_rwlock.c
new file mode 100644 (file)
index 0000000..e4219c5
--- /dev/null
@@ -0,0 +1,519 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdatomic.h>
+#include <math.h>
+#include <unistd.h>
+#include <sys/sysctl.h>
+#include <mach/mach.h>
+#include <pthread.h>
+#include <pthread/pthread_spis.h>
+#include <os/lock.h>
+#include <darwintest.h>
+
+// number of times the lock is taken per dt_stat batch
+#define ITERATIONS_PER_DT_STAT_BATCH 10000ull
+// number of times the contended mutex is taken per dt_stat batch
+#define ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX 1000ull
+// shift determining power of 2 factor of time spent by worker threads in the
+// busy() function while outside of the lock vs inside the lock
+#define OUTER_VS_INNER_SHIFT 4
+// fraction of read lock vs write lock acquires
+#define RDLOCK_FRACTION 0.99f
+// maintain and print progress counters in between measurement batches
+#define COUNTERS 0
+
+// move the darwintest assertion code out of the straight line execution path
+// since it is has non-trivial overhead and codegen impact even if the assertion
+// is never triggered.
+#define iferr(_e) if(__builtin_expect(!!(_e), 0))
+
+#pragma mark -
+
+uint64_t
+random_busy_counts(unsigned int *seed, uint64_t *inner, uint64_t *outer)
+{
+       uint64_t random = rand_r(seed);
+       const uint64_t of = (1 << OUTER_VS_INNER_SHIFT);
+       *inner = 0x4 + (random & (0x10 - 1));
+       *outer = 0x4 * of + ((random >> 4) & (0x10 * of - 1));
+       return random;
+}
+
+// By default busy() does cpu busy work for a passed in number of iterations
+enum {
+       busy_is_nothing = 0,
+       busy_is_cpu_busy,
+       busy_is_cpu_yield,
+};
+static int busy_select = busy_is_cpu_busy;
+
+static double
+cpu_busy(uint64_t n)
+{
+       double d = M_PI;
+       uint64_t i;
+       for (i = 0; i < n; i++) d *= M_PI;
+       return d;
+}
+
+static double
+cpu_yield(uint64_t n)
+{
+       uint64_t i;
+       for (i = 0; i < n; i++) {
+#if defined(__arm__) || defined(__arm64__)
+       asm volatile("yield");
+#elif defined(__x86_64__) || defined(__i386__)
+       asm volatile("pause");
+#else
+#error Unrecognized architecture
+#endif
+       }
+       return 0;
+}
+
+__attribute__((noinline))
+static double
+busy(uint64_t n)
+{
+       switch(busy_select) {
+       case busy_is_cpu_busy:
+               return cpu_busy(n);
+       case busy_is_cpu_yield:
+               return cpu_yield(n);
+       default:
+               return 0;
+       }
+}
+
+#pragma mark -
+
+static semaphore_t ready_sem, start_sem, end_sem;
+static uint32_t nthreads;
+static _Atomic uint32_t active_thr;
+static _Atomic int64_t todo;
+uint64_t iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH;
+
+#if COUNTERS
+static _Atomic uint64_t total_locks, total_rdlocks, total_wrlocks;
+#define ctr_inc(_t) atomic_fetch_add_explicit(&(_t), 1, memory_order_relaxed)
+#else
+#define ctr_inc(_t)
+#endif
+
+static uint32_t
+ncpu(void)
+{
+       static uint32_t activecpu, physicalcpu;
+       if (!activecpu) {
+               uint32_t n;
+               size_t s = sizeof(n);
+               sysctlbyname("hw.activecpu", &n, &s, NULL, 0);
+               activecpu = n;
+               s = sizeof(n);
+               sysctlbyname("hw.physicalcpu", &n, &s, NULL, 0);
+               physicalcpu = n;
+       }
+       return MIN(activecpu, physicalcpu);
+}
+
+__attribute__((noinline))
+static void
+threaded_bench(dt_stat_time_t s, int batch_size)
+{
+       kern_return_t kr;
+       for (int i = 0; i < nthreads; i++) {
+               kr = semaphore_wait(ready_sem);
+               iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
+       }
+       atomic_init(&active_thr, nthreads);
+       atomic_init(&todo, batch_size * iterations_per_dt_stat_batch);
+       dt_stat_token t = dt_stat_begin(s);
+       kr = semaphore_signal_all(start_sem);
+       iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");}
+       kr = semaphore_wait(end_sem);
+       iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
+       dt_stat_end_batch(s, batch_size, t);
+}
+
+static void
+setup_threaded_bench(void* (*thread_fn)(void*), bool singlethreaded)
+{
+       kern_return_t kr;
+       int r;
+       char *e;
+
+       if (singlethreaded) {
+               nthreads = 1;
+       } else {
+               if ((e = getenv("DT_STAT_NTHREADS"))) nthreads = strtoul(e, NULL, 0);
+               if (nthreads < 2) nthreads = ncpu();
+       }
+       if ((e = getenv("DT_STAT_CPU_BUSY"))) busy_select = strtoul(e, NULL, 0);
+
+       kr = semaphore_create(mach_task_self(), &ready_sem, SYNC_POLICY_FIFO, 0);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+       kr = semaphore_create(mach_task_self(), &start_sem, SYNC_POLICY_FIFO, 0);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+       kr = semaphore_create(mach_task_self(), &end_sem, SYNC_POLICY_FIFO, 0);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+
+       pthread_attr_t attr;
+       r = pthread_attr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_init");
+       r = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_setdetachstate");
+
+       for (int i = 0; i < nthreads; i++) {
+               pthread_t th;
+               r = pthread_create(&th, &attr, thread_fn, (void *)(uintptr_t)(i+1));
+               T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_create");
+       }
+}
+
+#pragma mark -
+
+static pthread_mutex_t mutex;
+
+static void *
+mutex_bench_thread(void * arg)
+{
+       kern_return_t kr;
+       int r;
+       unsigned int seed;
+       volatile double dummy;
+
+restart:
+       seed = (uintptr_t)arg; // each thread repeats its own sequence
+       kr = semaphore_wait_signal(start_sem, ready_sem);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+       while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+               uint64_t inner, outer;
+               random_busy_counts(&seed, &inner, &outer);
+               dummy = busy(outer);
+               r = pthread_mutex_lock(&mutex);
+               iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");}
+               dummy = busy(inner);
+               r = pthread_mutex_unlock(&mutex);
+               iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");}
+               ctr_inc(total_locks);
+       }
+
+       if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+               kr = semaphore_signal(end_sem);
+               T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+       }
+       goto restart;
+}
+
+static void
+mutex_bench(bool singlethreaded)
+{
+       int r;
+       int batch_size;
+#if COUNTERS
+       uint64_t batch = 0;
+#endif
+
+       setup_threaded_bench(mutex_bench_thread, singlethreaded);
+
+       pthread_mutexattr_t attr;
+       r = pthread_mutexattr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init");
+       pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FAIRSHARE);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np");
+       r = pthread_mutex_init(&mutex, &attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init");
+
+       dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & "
+                       "pthread_mutex_unlock (fairshare) on %u thread%s",
+                       iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+       do {
+               batch_size = dt_stat_batch_size(s);
+               threaded_bench(s, batch_size);
+#if COUNTERS
+               fprintf(stderr, "\rbatch: %4llu\t size: %4d\tmutexes: %8llu",
+                               ++batch, batch_size,
+                               atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+       } while (!dt_stat_stable(s));
+#if COUNTERS
+       fprintf(stderr, "\n");
+#endif
+       dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_mutex_bench, "Uncontended fairshare mutex",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       mutex_bench(true);
+}
+
+T_DECL(perf_contended_mutex_bench, "Contended fairshare mutex",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX;
+       mutex_bench(false);
+}
+
+#pragma mark -
+
+static pthread_rwlock_t rwlock;
+
+static void *
+rwlock_bench_thread(void * arg)
+{
+       kern_return_t kr;
+       int r;
+       unsigned int seed;
+       volatile double dummy;
+       const uint64_t rand_rdlock_max = (double)RAND_MAX * RDLOCK_FRACTION;
+
+restart:
+       seed = (uintptr_t)arg; // each thread repeats its own sequence
+       kr = semaphore_wait_signal(start_sem, ready_sem);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+       while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+               uint64_t inner, outer;
+               uint64_t random = random_busy_counts(&seed, &inner, &outer);
+               dummy = busy(outer);
+               if (random < rand_rdlock_max) {
+                       r = pthread_rwlock_rdlock(&rwlock);
+                       iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_rdlock");}
+                       dummy = busy(inner);
+                       r = pthread_rwlock_unlock(&rwlock);
+                       iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");}
+                       ctr_inc(total_rdlocks);
+               } else {
+                       r = pthread_rwlock_wrlock(&rwlock);
+                       iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_wrlock");}
+                       dummy = busy(inner);
+                       r = pthread_rwlock_unlock(&rwlock);
+                       iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");}
+                       ctr_inc(total_wrlocks);
+               }
+               ctr_inc(total_locks);
+       }
+
+       if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+               kr = semaphore_signal(end_sem);
+               T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+       }
+       goto restart;
+}
+
+static void
+rwlock_bench(bool singlethreaded)
+{
+       int r;
+       int batch_size;
+#if COUNTERS
+       uint64_t batch = 0;
+#endif
+
+       setup_threaded_bench(rwlock_bench_thread, singlethreaded);
+
+       r = pthread_rwlock_init(&rwlock, NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_init");
+
+       dt_stat_time_t s = dt_stat_time_create("%llu pthread_rwlock_rd/wrlock & "
+                       "pthread_rwlock_unlock (%.0f%% rdlock) on %u thread%s",
+                       iterations_per_dt_stat_batch, RDLOCK_FRACTION * 100, nthreads,
+                       nthreads > 1 ? "s" : "");
+       do {
+               batch_size = dt_stat_batch_size(s);
+               threaded_bench(s, batch_size);
+#if COUNTERS
+               fprintf(stderr, "\rbatch: %4llu\t size: %4d\trwlocks: %8llu\t"
+                               "rd: %8llu\twr: %8llu", ++batch, batch_size,
+                               atomic_load_explicit(&total_locks,   memory_order_relaxed),
+                               atomic_load_explicit(&total_rdlocks, memory_order_relaxed),
+                               atomic_load_explicit(&total_wrlocks, memory_order_relaxed));
+#endif
+       } while (!dt_stat_stable(s));
+#if COUNTERS
+       fprintf(stderr, "\n");
+#endif
+       dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_rwlock_bench, "Uncontended rwlock",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       rwlock_bench(true);
+}
+
+T_DECL(perf_contended_rwlock_bench, "Contended rwlock",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       rwlock_bench(false);
+}
+
+#pragma mark -
+
+static os_unfair_lock unfair_lock;
+
+static void *
+unfair_lock_bench_thread(void * arg)
+{
+       kern_return_t kr;
+       unsigned int seed;
+       volatile double dummy;
+
+restart:
+       seed = (uintptr_t)arg; // each thread repeats its own sequence
+       kr = semaphore_wait_signal(start_sem, ready_sem);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+       while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+               uint64_t inner, outer;
+               random_busy_counts(&seed, &inner, &outer);
+               dummy = busy(outer);
+               os_unfair_lock_lock(&unfair_lock);
+               dummy = busy(inner);
+               os_unfair_lock_unlock(&unfair_lock);
+               ctr_inc(total_locks);
+       }
+
+       if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+               kr = semaphore_signal(end_sem);
+               T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+       }
+       goto restart;
+}
+
+static void
+unfair_lock_bench(bool singlethreaded)
+{
+       int r;
+       int batch_size;
+#if COUNTERS
+       uint64_t batch = 0;
+#endif
+
+       setup_threaded_bench(unfair_lock_bench_thread, singlethreaded);
+
+       dt_stat_time_t s = dt_stat_time_create("%llu os_unfair_lock_lock & "
+                       "os_unfair_lock_unlock on %u thread%s",
+                       iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+       do {
+               batch_size = dt_stat_batch_size(s);
+               threaded_bench(s, batch_size);
+#if COUNTERS
+               fprintf(stderr, "\rbatch: %4llu\t size: %4d\tunfair_locks: %8llu",
+                               ++batch, batch_size,
+                               atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+       } while (!dt_stat_stable(s));
+#if COUNTERS
+       fprintf(stderr, "\n");
+#endif
+       dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_unfair_lock_bench, "Unontended unfair lock",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       unfair_lock_bench(true);
+}
+
+T_DECL(perf_contended_unfair_lock_bench, "Contended unfair lock",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       unfair_lock_bench(false);
+}
+
+#pragma mark -
+
+static pthread_mutex_t ffmutex;
+
+static void *
+ffmutex_bench_thread(void * arg)
+{
+       kern_return_t kr;
+       int r;
+       unsigned int seed;
+       volatile double dummy;
+
+restart:
+       seed = (uintptr_t)arg; // each thread repeats its own sequence
+       kr = semaphore_wait_signal(start_sem, ready_sem);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+       while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+               uint64_t inner, outer;
+               random_busy_counts(&seed, &inner, &outer);
+               dummy = busy(outer);
+               r = pthread_mutex_lock(&ffmutex);
+               iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");}
+               dummy = busy(inner);
+               r = pthread_mutex_unlock(&ffmutex);
+               iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");}
+               ctr_inc(total_locks);
+       }
+
+       if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+               kr = semaphore_signal(end_sem);
+               T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+       }
+       goto restart;
+}
+
+static void
+ffmutex_bench(bool singlethreaded)
+{
+       int r;
+       int batch_size;
+#if COUNTERS
+       uint64_t batch = 0;
+#endif
+
+       setup_threaded_bench(ffmutex_bench_thread, singlethreaded);
+
+       pthread_mutexattr_t attr;
+       r = pthread_mutexattr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init");
+       pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np");
+       r = pthread_mutex_init(&ffmutex, &attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init");
+
+       dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & "
+                       "pthread_mutex_unlock (first-fit) on %u thread%s",
+                       iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+       do {
+               batch_size = dt_stat_batch_size(s);
+               threaded_bench(s, batch_size);
+#if COUNTERS
+               fprintf(stderr, "\rbatch: %4llu\t size: %4d\tffmutexes: %8llu",
+                               ++batch, batch_size,
+                               atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+       } while (!dt_stat_stable(s));
+#if COUNTERS
+       fprintf(stderr, "\n");
+#endif
+       dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_ffmutex_bench, "Uncontended first-fit mutex",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       ffmutex_bench(true);
+}
+
+T_DECL(perf_contended_ffmutex_bench, "Contended first-fit mutex",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       ffmutex_bench(false);
+}
diff --git a/tests/pthread_dependency.c b/tests/pthread_dependency.c
new file mode 100644 (file)
index 0000000..a6fd316
--- /dev/null
@@ -0,0 +1,78 @@
+#include "darwintest_defaults.h"
+#include <darwintest_utils.h>
+#include <pthread/dependency_private.h>
+
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
+static struct job {
+       pthread_dependency_t *req;
+       useconds_t usleep;
+       int done;
+} job;
+
+static void *
+do_test(void *__unused arg)
+{
+       pthread_mutex_lock(&mutex);
+
+       while (!job.done) {
+               while (job.req == 0) {
+                       pthread_cond_wait(&cond, &mutex);
+               }
+               if (job.usleep) usleep(job.usleep);
+               pthread_dependency_fulfill_np(job.req, job.req);
+               job.req = NULL;
+       }
+
+       pthread_mutex_unlock(&mutex);
+       return NULL;
+}
+
+static void
+post_req(pthread_dependency_t *req, useconds_t delay, bool done)
+{
+       pthread_mutex_lock(&mutex);
+       job.req = req;
+       job.usleep = delay;
+       job.done = done;
+       pthread_cond_signal(&cond);
+       pthread_mutex_unlock(&mutex);
+}
+
+T_DECL(dependency, "dependency", T_META_ALL_VALID_ARCHS(YES))
+{
+       pthread_dependency_t req;
+       pthread_t pth;
+       void *v;
+       int ret;
+
+       T_ASSERT_POSIX_ZERO(pthread_create(&pth, NULL, do_test, NULL), NULL);
+
+       T_LOG("Waiting on a pdependency that takes some time");
+
+       pthread_dependency_init_np(&req, pth, NULL);
+       post_req(&req, 100000, false);
+       v = pthread_dependency_wait_np(&req);
+       T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+       T_LOG("Waiting on a pdependency that is already fulfilled");
+
+       pthread_dependency_init_np(&req, pth, NULL);
+       post_req(&req, 0, false);
+       usleep(100000);
+       v = pthread_dependency_wait_np(&req);
+       T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+       T_LOG("Waiting on a fulfilled pdependency with the other thread exiting");
+
+       pthread_dependency_init_np(&req, pth, NULL);
+       post_req(&req, 0, true);
+       ret = pthread_join(pth, NULL);
+       T_EXPECT_POSIX_ZERO(ret, "pthread_join");
+
+       v = pthread_dependency_wait_np(&req);
+       T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+       T_END;
+}
index d28ca65e7e52d6257ad704046b6c886b7aa2a90c..19cfc254f5f78c8d0df6a4cb72c79f8ea5bab71a 100644 (file)
@@ -9,7 +9,7 @@ extern __uint64_t __thread_selfid( void );
 static void *do_test(void * __unused arg)
 {
        uint64_t threadid = __thread_selfid();
 static void *do_test(void * __unused arg)
 {
        uint64_t threadid = __thread_selfid();
-       T_ASSERT_NOTNULL(threadid, NULL);
+       T_ASSERT_NE(threadid, (uint64_t)0, "__thread_selfid()");
 
        uint64_t pth_threadid = 0;
        T_ASSERT_POSIX_ZERO(pthread_threadid_np(NULL, &pth_threadid), NULL);
 
        uint64_t pth_threadid = 0;
        T_ASSERT_POSIX_ZERO(pthread_threadid_np(NULL, &pth_threadid), NULL);
index 65cd56e5395b34dc455b18a7922ecf21dbc4bdb5..068836a7695e96276a635eefd5959d322f28112e 100644 (file)
@@ -72,7 +72,9 @@ T_DECL(thread_request_32848402, "repro for rdar://32848402")
        end_spin = clock_gettime_nsec_np(CLOCK_MONOTONIC) + 2 * NSEC_PER_SEC;
 
        dispatch_async_f(a, (void *)0, spin_and_pause);
        end_spin = clock_gettime_nsec_np(CLOCK_MONOTONIC) + 2 * NSEC_PER_SEC;
 
        dispatch_async_f(a, (void *)0, spin_and_pause);
-       for (long i = 1; i < get_ncpu(); i++) {
+       long n_threads = MIN((long)get_ncpu(),
+                       pthread_qos_max_parallelism(QOS_CLASS_BACKGROUND, 0));
+       for (long i = 1; i < n_threads; i++) {
                dispatch_async_f(b, (void *)i, spin);
        }
 
                dispatch_async_f(b, (void *)i, spin);
        }
 
diff --git a/tests/stack.c b/tests/stack.c
new file mode 100644 (file)
index 0000000..f910b28
--- /dev/null
@@ -0,0 +1,82 @@
+#include <signal.h>
+#include <pthread/stack_np.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_utils.h>
+
+#if defined(__arm64__)
+#define call_chkstk(value) \
+               __asm__ volatile("orr x9, xzr, %0\t\n" \
+                               "bl _thread_chkstk_darwin" : : "i"(value) : "x9")
+#define TRAPSIG SIGTRAP
+#elif defined(__x86_64__)
+#define call_chkstk(value) \
+               __asm__ volatile("movq %0, %%rax\t\n" \
+                               "callq _thread_chkstk_darwin" : : "i"(value) : "rax")
+#define TRAPSIG SIGILL
+#elif defined(__i386__)
+#define call_chkstk(value) \
+               __asm__ volatile("movl %0, %%eax\t\n" \
+                               "calll _thread_chkstk_darwin" : : "i"(value) : "eax")
+#define TRAPSIG SIGILL
+#endif
+
+static void
+got_signal(int signo __unused)
+{
+       T_PASS("calling with 1 << 24 crashed");
+       T_END;
+}
+
+T_DECL(chkstk, "chkstk",
+               T_META_ALL_VALID_ARCHS(YES), T_META_CHECK_LEAKS(NO))
+{
+#if defined(__arm__)
+       T_SKIP("not on armv7");
+#else
+
+       call_chkstk(1 << 8);
+       T_PASS("calling with 1 << 8");
+
+       call_chkstk(1 << 16);
+       T_PASS("calling with 1 << 16");
+
+       signal(TRAPSIG, got_signal);
+
+       call_chkstk(1 << 24);
+       T_FAIL("should have crashed");
+#endif
+}
+
+struct frame {
+       uintptr_t frame;
+       uintptr_t ret;
+};
+
+OS_NOINLINE OS_NOT_TAIL_CALLED
+static void
+do_stack_frame_decode_test(struct frame frames[], size_t n, size_t count)
+{
+       if (n < count) {
+               frames[n].frame = (uintptr_t)__builtin_frame_address(1);
+               frames[n].ret = (uintptr_t)__builtin_return_address(0);
+               do_stack_frame_decode_test(frames, n + 1, count);
+       } else {
+               uintptr_t frame = (uintptr_t)__builtin_frame_address(1);
+               uintptr_t ret;
+               while (count-- > 0) {
+                       frame = pthread_stack_frame_decode_np(frame, &ret);
+                       T_EXPECT_EQ(frames[count].frame, frame, "Frame %zd", count);
+                       T_EXPECT_EQ(frames[count].ret, ret, "Retaddr %zd", count);
+               }
+       }
+}
+
+T_DECL(pthread_stack_frame_decode_np, "pthread_stack_frame_decode_np",
+               T_META_ALL_VALID_ARCHS(YES), T_META_CHECK_LEAKS(NO))
+{
+       struct frame frames[10];
+       frames[0].frame = (uintptr_t)__builtin_frame_address(1);
+       frames[0].ret = (uintptr_t)__builtin_return_address(0);
+       do_stack_frame_decode_test(frames, 1, 10);
+}
index a8dab42eb170c3b69a283b0126a0161220b28008..aaf483ebc8f58d1527c2cbc980c452c5241f53e9 100644 (file)
@@ -133,7 +133,7 @@ again:
 
        for (int i = 0; i < attempts; i++) {
                char *t;
 
        for (int i = 0; i < attempts; i++) {
                char *t;
-               asprintf(&t, "%s/%zd", tmp, i);
+               asprintf(&t, "%s/%d", tmp, i);
                T_QUIET; T_ASSERT_POSIX_SUCCESS(mkdir(t, 0700), "mkdir");
                setenv("BATS_TMP_DIR", t, 1); // hack to workaround rdar://33443485
                free(t);
                T_QUIET; T_ASSERT_POSIX_SUCCESS(mkdir(t, 0700), "mkdir");
                setenv("BATS_TMP_DIR", t, 1); // hack to workaround rdar://33443485
                free(t);
@@ -144,7 +144,7 @@ again:
                                T_QUIET; T_FAIL("Helper should complete in <.1s");
                                goto timeout;
                        }
                                T_QUIET; T_FAIL("Helper should complete in <.1s");
                                goto timeout;
                        }
-                       usleep(1000);
+                       usleep(1000 * 100);
                } while (shmem->done <= i);
        }
        setenv("BATS_TMP_DIR", tmpdir, 1);
                } while (shmem->done <= i);
        }
        setenv("BATS_TMP_DIR", tmpdir, 1);
diff --git a/tests/stack_size.c b/tests/stack_size.c
new file mode 100644 (file)
index 0000000..3a52747
--- /dev/null
@@ -0,0 +1,81 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "darwintest_defaults.h"
+
+#define PTHREAD_T_OFFSET (0)
+
+static void *
+function(void *arg)
+{
+       size_t expected_size = (size_t)(uintptr_t)arg;
+       T_ASSERT_EQ(pthread_get_stacksize_np(pthread_self()), expected_size,
+                       "saw expected pthread_get_stacksize_np");
+       return NULL;
+}
+
+T_DECL(stack_size_default, "stack size of default pthread",
+               T_META_ALL_VALID_ARCHS(YES))
+{
+       static const size_t dflsize = 512 * 1024;
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+                       (void *)(dflsize + PTHREAD_T_OFFSET)), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+}
+
+T_DECL(stack_size_customsize, "stack size of thread with custom stack size",
+               T_META_ALL_VALID_ARCHS(YES))
+{
+       static const size_t stksize = 768 * 1024;
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_attr_setstacksize(&attr, stksize), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+                       (void *)(stksize + PTHREAD_T_OFFSET)), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+}
+
+T_DECL(stack_size_customaddr, "stack size of thread with custom stack addr",
+               T_META_ALL_VALID_ARCHS(YES))
+{
+       static const size_t stksize = 512 * 1024;
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       uintptr_t stackaddr = (uintptr_t)valloc(stksize);
+       stackaddr += stksize; // address is top of stack
+
+       T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_attr_setstackaddr(&attr, (void *)stackaddr),
+                       NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+                       (void *)stksize), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+       free((void *)(stackaddr - stksize));
+}
+
+T_DECL(stack_size_custom, "stack size of thread with custom stack addr+size",
+               T_META_ALL_VALID_ARCHS(YES))
+{
+       static const size_t stksize = 768 * 1024;
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       uintptr_t stackaddr = (uintptr_t)valloc(stksize);
+       stackaddr += stksize; // address is top of stack
+
+       T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_attr_setstackaddr(&attr, (void *)stackaddr),
+                       NULL);
+       T_ASSERT_POSIX_ZERO(pthread_attr_setstacksize(&attr, stksize), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+                       (void *)stksize), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+       free((void *)(stackaddr - stksize));
+}
index ecc64bc1987d46e94b7aa9873277a755bba39e8f..bb5380de1a4aa166537211c9f27d0bab5463e577 100755 (executable)
@@ -28,10 +28,26 @@ get_prefix = function(buf)
        local proc
        proc = buf.command
 
        local proc
        proc = buf.command
 
-       return string.format("%s %6.9f %-17s [%05d.%06x] %-24s",
+       return string.format("%s %6.9f %-17s [%05d.%06x] %-35s",
                prefix, secs, proc, buf.pid, buf.threadid, buf.debugname)
 end
 
                prefix, secs, proc, buf.pid, buf.threadid, buf.debugname)
 end
 
+get_count = function(val)
+       return ((val & 0xffffff00) >> 8)
+end
+
+get_kwq_type = function(val)
+       if val & 0xff == 0x1 then
+               return "MTX"
+       elseif val & 0xff == 0x2 then
+               return "CVAR"
+       elseif val & 0xff == 0x4 then
+               return "RWL"
+       else
+               return string.format("0x%04x", val)
+       end
+end
+
 decode_lval = function(lval)
        local kbit = " "
        if lval & 0x1 ~= 0 then
 decode_lval = function(lval)
        local kbit = " "
        if lval & 0x1 ~= 0 then
@@ -61,61 +77,282 @@ decode_sval = function(sval)
        end
 
        local count = sval >> 8
        end
 
        local count = sval >> 8
-       return string.format("[0x%06x, %s%s]", count, ibit, sbit)
+       return string.format("[0x%06x,  %s%s]", count, ibit, sbit)
+end
+
+decode_cv_sval = function(sval)
+       local sbit = " "
+       if sval & 0x1 ~= 0 then
+               sbit = "C"
+       end
+       local ibit = " "
+       if sval & 0x2 ~= 0 then
+               ibit = "P"
+       end
+
+       local count = sval >> 8
+       return string.format("[0x%06x,  %s%s]", count, ibit, sbit)
 end
 
 trace_codename("psynch_mutex_lock_updatebits", function(buf)
        local prefix = get_prefix(buf)
        if buf[4] == 0 then
 end
 
 trace_codename("psynch_mutex_lock_updatebits", function(buf)
        local prefix = get_prefix(buf)
        if buf[4] == 0 then
-               printf("%s\tupdated lock bits, pre-kernel (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+               printf("%s\tupdated lock bits, pre-kernel\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+                               prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
        else
        else
-               printf("%s\tupdated lock bits, post-kernel (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+               printf("%s\tupdated lock bits, post-kernel\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+                               prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
        end
 end)
 
 trace_codename("psynch_mutex_unlock_updatebits", function(buf)
        local prefix = get_prefix(buf)
        end
 end)
 
 trace_codename("psynch_mutex_unlock_updatebits", function(buf)
        local prefix = get_prefix(buf)
-       printf("%s\tupdated unlock bits (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+       printf("%s\tupdated unlock bits\t\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+end)
+
+trace_codename("psynch_ffmutex_lock_updatebits", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tlock path, bits update\t\taddr: 0x%016x\toldl: %s\toldu: %s\twaiters: %d\n",
+                               prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), get_count(buf[2]) - get_count(buf[3]))
+       else
+               printf("%s\tlock path, bits update\t\taddr: 0x%016x\tnewl: %s\tnewu: %s\twaiters: %d\n",
+                               prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), get_count(buf[2]) - get_count(buf[3]))
+       end
+end)
+
+trace_codename("psynch_ffmutex_unlock_updatebits", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tunlock path, update bits\taddr: 0x%016x\toldl: %s\tnewl: %s\tnewu: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+end)
+
+trace_codename("psynch_ffmutex_wake", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tfirst fit kernel wake\t\taddr: 0x%016x\tlval: %s\tuval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+end)
+
+trace_codename("psynch_ffmutex_wait", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tfirstfit kernel wait\t\taddr: 0x%016x\tlval: %s\tuval: %s\tflags: 0x%x\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
+       else
+               printf("%s\tfirstfit kernel wait\t\taddr: 0x%016x\trval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]))
+       end
 end)
 
 trace_codename("psynch_mutex_ulock", function(buf)
        local prefix = get_prefix(buf)
 
        if trace.debugid_is_start(buf.debugid) then
 end)
 
 trace_codename("psynch_mutex_ulock", function(buf)
        local prefix = get_prefix(buf)
 
        if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tlock busy, waiting in kernel (addr: 0x%016x, lval: %s, sval: %s, owner_tid: 0x%x)\n",
+               printf("%s\tlock busy, waiting in kernel\taddr: 0x%016x\tlval: %s\tsval: %s\towner_tid: 0x%x\n",
                        prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
        elseif trace.debugid_is_end(buf.debugid) then
                        prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
        elseif trace.debugid_is_end(buf.debugid) then
-               printf("%s\tlock acquired from kernel (addr: 0x%016x, updated bits: %s)\n",
+               printf("%s\tlock acquired from kernel\taddr: 0x%016x\tupdt: %s\n",
                        prefix, buf[1], decode_lval(buf[2]))
        else
                        prefix, buf[1], decode_lval(buf[2]))
        else
-               printf("%s\tlock taken, uncontended (addr: 0x%016x, lval: %s, sval: %s)\n",
+               printf("%s\tlock taken userspace\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
                        prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
        end
 end)
 
 trace_codename("psynch_mutex_utrylock_failed", function(buf)
        local prefix = get_prefix(buf)
                        prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
        end
 end)
 
 trace_codename("psynch_mutex_utrylock_failed", function(buf)
        local prefix = get_prefix(buf)
-       printf("%s\tmutex trybusy addr: 0x%016x lval: %s sval: %s owner: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
+       printf("%s\tmutex trybusy\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\towner: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
 end)
 
 trace_codename("psynch_mutex_uunlock", function(buf)
        local prefix = get_prefix(buf)
 
        if trace.debugid_is_start(buf.debugid) then
 end)
 
 trace_codename("psynch_mutex_uunlock", function(buf)
        local prefix = get_prefix(buf)
 
        if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tunlock, signalling kernel waiters (addr: 0x%016x, lval: %s, sval: %s, owner_tid: 0x%x)\n",
+               printf("%s\tunlock, signalling kernel\taddr: 0x%016x\tlval: %s\tsval: %s\towner_tid: 0x%x\n",
                        prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
        elseif trace.debugid_is_end(buf.debugid) then
                        prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
        elseif trace.debugid_is_end(buf.debugid) then
-               printf("%s\tunlock, waiters signalled (addr: 0x%016x, updated bits: %s)\n",
+               printf("%s\tunlock, waiters signalled\taddr: 0x%016x\tupdt: %s\n",
                        prefix, buf[1], decode_lval(buf[2]))
        else
                        prefix, buf[1], decode_lval(buf[2]))
        else
-               printf("%s\tunlock, no kernel waiters (addr: 0x%016x, lval: %s, sval: %s)\n",
+               printf("%s\tunlock, no kernel waiters\taddr: 0x%016x\tlval: %s\tsval: %s\n",
                        prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
        end
 end)
 
                        prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
        end
 end)
 
--- The trace codes we need aren't enabled by default
-darwin.sysctlbyname("kern.pthread_debug_tracing", 1)
-completion_handler = function()
-       darwin.sysctlbyname("kern.pthread_debug_tracing", 0)
-end
-trace.set_completion_handler(completion_handler)
+trace_codename("psynch_mutex_clearprepost", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tclear prepost\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
+               prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+end)
+
+trace_codename("psynch_mutex_markprepost", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tmark prepost\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+       else
+               printf("%s\tmark prepost\t\t\taddr: 0x%016x\tcleared: %d\n",
+                       prefix, buf[1], buf[2])
+       end
+end)
+
+trace_codename("psynch_mutex_kwqallocate", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel kwq allocated\t\taddr: 0x%016x\ttype: %s\tkwq: 0x%016x\n",
+                       prefix, buf[1], get_kwq_type(buf[2]), buf[3])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tkernel kwq allocated\t\taddr: 0x%016x\tlval: %s\tuval: %s\tsval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+       end
+end)
+
+trace_codename("psynch_mutex_kwqdeallocate", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel kwq deallocated\t\taddr: 0x%016x\ttype: %s\tfreenow: %d\n",
+                       prefix, buf[1], get_kwq_type(buf[2]), buf[3])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tkernel kwq deallocated\t\taddr: 0x%016x\tlval: %s\tuval: %s\tsval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+       end
+end)
+
+trace_codename("psynch_mutex_kwqprepost", function(buf)
+       local prefix = get_prefix(buf)
+       if buf[4] == 0 then
+               printf("%s\tkernel prepost incremented\taddr: 0x%016x\tlval: %s\tinqueue: %d\n",
+                       prefix, buf[1], decode_lval(buf[2]), buf[3])
+       elseif buf[4] == 1 then
+               printf("%s\tkernel prepost decremented\taddr: 0x%016x\tlval: %s\tremaining: %d\n",
+                       prefix, buf[1], decode_lval(buf[2]), buf[3])
+       elseif buf[4] == 2 then
+               printf("%s\tkernel prepost cleared\t\taddr: 0x%016x\tlval: %s\n", prefix,
+                       buf[1], decode_lval(buf[2]))
+       end
+end)
+
+trace_codename("psynch_mutex_kwqcollision", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tkernel kwq collision\t\taddr: 0x%016x\ttype: %d\n", prefix,
+               buf[1], buf[2])
+end)
+
+trace_codename("psynch_mutex_kwqsignal", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel mutex signal\t\taddr: 0x%016x\tkwe: 0x%16x\ttid: 0x%x\tinqueue: %d\n",
+                       prefix, buf[1], buf[2], buf[3], buf[4]);
+       else
+               printf("%s\tkernel mutex signal\t\taddr: 0x%016x\tkwe: 0x%16x\tret: 0x%x\n",
+                       prefix, buf[1], buf[2], buf[3]);
+       end
+end)
+
+trace_codename("psynch_mutex_kwqwait", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tkernel mutex wait\t\taddr: 0x%016x\tinqueue: %d\tprepost: %d\tintr: %d\n",
+               prefix, buf[1], buf[2], buf[3], buf[4])
+end)
+
+trace_codename("psynch_cvar_kwait", function(buf)
+       local prefix = get_prefix(buf)
+       if buf[4] == 0 then
+               printf("%s\tkernel condvar wait\t\taddr: 0x%016x\tmutex: 0x%016x\tcgen: 0x%x\n",
+                       prefix, buf[1], buf[2], buf[3])
+       elseif buf[4] == 1 then
+               printf("%s\tkernel condvar sleep\t\taddr: 0x%016x\tflags: 0x%x\n",
+                       prefix, buf[1], buf[3])
+       elseif buf[4] == 2 then
+               printf("%s\tkernel condvar wait return\taddr: 0x%016x\terror: 0x%x\tupdt: 0x%x\n",
+                       prefix, buf[1], buf[2], buf[3])
+       elseif buf[4] == 3 and (buf[2] & 0xff) == 60 then
+               printf("%s\tkernel condvar timeout\t\taddr: 0x%016x\terror: 0x%x\n",
+                       prefix, buf[1], buf[2])
+       elseif buf[4] == 3 then
+               printf("%s\tkernel condvar wait error\taddr: 0x%016x\terror: 0x%x\n",
+                       prefix, buf[1], buf[2])
+       elseif buf[4] == 4 then
+               printf("%s\tkernel condvar wait return\taddr: 0x%016x\tupdt: 0x%x\n",
+                       prefix, buf[1], buf[2])
+       end
+end)
+
+trace_codename("psynch_cvar_clrprepost", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tkernel condvar clear prepost:\taddr: 0x%016x\ttype: 0x%x\tprepost seq: %s\n",
+               prefix, buf[1], buf[2], decode_lval(buf[3]))
+end)
+
+trace_codename("psynch_cvar_freeitems", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tcvar free fake/prepost items\taddr: 0x%016x\ttype: %d\t\t\tupto: %s\tall: %d\n",
+                       prefix, buf[1], buf[2], decode_lval(buf[3]), buf[4])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tcvar free fake/prepost items\taddr: 0x%016x\tfreed: %d\tsignaled: %d\tinqueue: %d\n",
+                       prefix, buf[1], buf[2], buf[3], buf[4])
+       elseif buf[4] == 1 then
+               printf("%s\tcvar free, signalling waiter\taddr: 0x%016x\tinqueue: %d\tkwe: 0x%016x\n",
+                       prefix, buf[1], buf[3], buf[2])
+       elseif buf[4] == 2 then
+               printf("%s\tcvar free, removing fake\taddr: 0x%016x\tinqueue: %d\tkwe: 0x%016x\n",
+                       prefix, buf[1], buf[3], buf[2])
+       end
+end)
+
+trace_codename("psynch_cvar_signal", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tfrom: %s\tupto: %s\tbroad: %d\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), buf[4])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tupdt: %s\n",
+                       prefix, buf[1], decode_cv_sval(buf[2]))
+       else
+               printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tsignalled waiters (converted to broadcast: %d)\n",
+                       prefix, buf[1], buf[2])
+       end
+end)
+
+trace_codename("psynch_cvar_broadcast", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tupto: %s\tinqueue: %d\n",
+                       prefix, buf[1], decode_lval(buf[2]), buf[3])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tupdt: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]))
+       elseif buf[4] == 1 then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tsignalling: 0x%16x\n",
+                       prefix, buf[1], buf[2])
+       elseif buf[4] == 2 then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tremoving fake: 0x%16x\tstate: %d\n",
+                       prefix, buf[1], buf[2], buf[3])
+       elseif buf[4] == 3 then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tprepost\tlval: %s\tsval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_cv_sval(buf[3]))
+       elseif buf[4] == 4 then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tbroadcast prepost: 0x%016x\n",
+                       prefix, buf[1], buf[2])
+       end
+end)
+
+trace_codename("psynch_cvar_zeroed", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tkernel cvar zeroed\t\taddr: 0x%016x\tlval: %s\tsval: %s\tinqueue: %d\n",
+               prefix, buf[1], decode_lval(buf[2]), decode_cv_sval(buf[3]), buf[4])
+end)
+
+trace_codename("psynch_cvar_updateval", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tcvar updateval\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\tupdateval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2] & 0xffffffff), decode_cv_sval(buf[2] >> 32), decode_cv_sval(buf[3]))
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tcvar updateval (updated)\taddr: 0x%016x\tlval: %s\tsval: %s\tdiffgen: %d\tneedsclear: %d\n",
+                       prefix, buf[1], decode_lval(buf[2] & 0xffffffff), decode_cv_sval(buf[2] >> 32), buf[3] >> 32, buf[3] & 0x1)
+       end
+end)
+
diff --git a/tools/wqtrace.lua b/tools/wqtrace.lua
deleted file mode 100755 (executable)
index 2da03da..0000000
+++ /dev/null
@@ -1,349 +0,0 @@
-#!/usr/local/bin/luatrace -s
-
-trace_codename = function(codename, callback)
-       local debugid = trace.debugid(codename)
-       if debugid ~= 0 then
-               trace.single(debugid,callback)
-       else
-               printf("WARNING: Cannot locate debugid for '%s'\n", codename)
-       end
-end
-
-initial_timestamp = 0
-workqueue_ptr_map = {};
-get_prefix = function(buf)
-       if initial_timestamp == 0 then
-               initial_timestamp = buf.timestamp
-       end
-       local secs = trace.convert_timestamp_to_nanoseconds(buf.timestamp - initial_timestamp) / 1000000000
-
-       local prefix
-       if trace.debugid_is_start(buf.debugid) then
-               prefix = "→"
-       elseif trace.debugid_is_end(buf.debugid) then
-               prefix = "←"
-       else
-               prefix = "↔"
-       end
-
-       local proc
-       if buf.command ~= "kernel_task" then
-               proc = buf.command
-               workqueue_ptr_map[buf[1]] = buf.command
-       elseif workqueue_ptr_map[buf[1]] ~= nil then
-               proc = workqueue_ptr_map[buf[1]]
-       else
-               proc = "UNKNOWN"
-       end
-
-       return string.format("%s %6.9f %-17s [%05d.%06x] %-24s",
-               prefix, secs, proc, buf.pid, buf.threadid, buf.debugname)
-end
-
-parse_pthread_priority = function(pri)
-       pri = pri & 0xffffffff
-       if (pri & 0x02000000) == 0x02000000 then
-               return "Manager"
-       end
-       local qos = (pri & 0x00ffff00) >> 8
-       if qos == 0x20 then
-               return string.format("UI[%x]", pri);
-       elseif qos == 0x10 then
-               return string.format("IN[%x]", pri);
-       elseif qos == 0x08 then
-               return string.format("DF[%x]", pri);
-       elseif qos == 0x04 then
-               return string.format("UT[%x]", pri);
-       elseif qos == 0x02 then
-               return string.format("BG[%x]", pri);
-       elseif qos == 0x01 then
-               return string.format("MT[%x]", pri);
-       elseif qos == 0x00 then
-               return string.format("--[%x]", pri);
-       else
-               return string.format("??[%x]", pri);
-       end
-end
-
-parse_qos_bucket = function(pri)
-       if pri == 0 then
-               return string.format("UI[%x]", pri);
-       elseif pri == 1 then
-               return string.format("IN[%x]", pri);
-       elseif pri == 2 then
-               return string.format("DF[%x]", pri);
-       elseif pri == 3 then
-               return string.format("UT[%x]", pri);
-       elseif pri == 4 then
-               return string.format("BG[%x]", pri);
-       elseif pri == 5 then
-               return string.format("MT[%x]", pri);
-       elseif pri == 6 then
-               return string.format("MG[%x]", pri);
-       else
-               return string.format("??[%x]", pri);
-       end
-end
-
-parse_thactive_req_bucket = function(pri)
-    if pri ~= 6 then
-        return parse_qos_bucket(pri)
-    end
-    return "None"
-end
-
-get_thactive = function(low, high)
-    return string.format("req: %s, MG: %d, UI: %d, IN: %d, DE: %d, UT: %d, BG: %d, MT: %d",
-           parse_thactive_req_bucket(high >> (16 * 3)), (high >> (2 * 16)) & 0xffff,
-           (low  >> (0 * 16)) & 0xffff, (low  >> (1 * 16)) & 0xffff,
-           (low  >> (2 * 16)) & 0xffff, (low  >> (3 * 16)) & 0xffff,
-           (high >> (0 * 16)) & 0xffff, (high >> (1 * 16)) & 0xffff)
-end
-
--- workqueue lifecycle
-
-trace_codename("wq_pthread_exit", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tprocess is exiting\n",prefix)
-       else
-               printf("%s\tworkqueue marked as exiting and timer is complete\n",prefix)
-       end
-end)
-
-trace_codename("wq_workqueue_exit", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tall threads have exited, cleaning up\n",prefix)
-       else
-               printf("%s\tclean up complete\n",prefix)
-       end
-end)
-
-trace_codename("wq_start_add_timer", function(buf)
-       local prefix = get_prefix(buf)
-       printf("%s\tarming timer to fire in %d us (flags: %x, reqcount: %d)\n",
-               prefix, buf.arg4, buf.arg3, buf.arg2)
-end)
-
-trace_codename("wq_add_timer", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tadd_timer fired (flags: %x, nthreads: %d, thidlecount: %d)\n",
-                       prefix, buf.arg2, buf.arg3, buf.arg4)
-       elseif trace.debugid_is_end(buf.debugid) then
-               printf("%s\tadd_timer completed (start_timer: %x, nthreads: %d, thidlecount: %d)\n",
-                       prefix, buf.arg2, buf.arg3, buf.arg4)
-       else
-               printf("%s\tadd_timer added threads (reqcount: %d, thidlecount: %d, busycount: %d)\n",
-                       prefix, buf.arg2, buf.arg3, buf.arg4)
-
-       end
-end)
-
-trace_codename("wq_run_threadreq", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               if buf[2] > 0 then
-                       printf("%s\trun_threadreq: %x (priority: %s, flags: %d) on %x\n",
-                                       prefix, buf[2], parse_qos_bucket(buf[4] >> 16), buf[4] & 0xff, buf[3])
-               else
-                       printf("%s\trun_threadreq: <none> on %x\n",
-                                       prefix, buf[3])
-               end
-       else
-               if buf[2] == 1 then
-                       printf("%s\tpended event manager, already running\n", prefix)
-               elseif buf[2] == 2 then
-                       printf("%s\tnothing to do\n", prefix)
-               elseif buf[2] == 3 then
-                       printf("%s\tno eligible request found\n", prefix)
-               elseif buf[2] == 4 then
-                       printf("%s\tadmission control failed\n", prefix)
-               elseif buf[2] == 5 then
-                       printf("%s\tunable to add new thread (may_add_new_thread: %d, nthreads: %d)\n", prefix, buf[3], buf[4])
-               elseif buf[2] == 6 then
-                       printf("%s\tthread creation failed\n", prefix)
-               elseif buf[2] == 0 then
-                       printf("%s\tsuccess\n", prefix)
-               else
-                       printf("%s\tWARNING: UNKNOWN END CODE:%d\n", prefix, buf.arg4)
-               end
-       end
-end)
-
-trace_codename("wq_run_threadreq_mgr_merge", function(buf)
-       local prefix = get_prefix(buf)
-       printf("%s\t\tmerging incoming manager request into existing\n", prefix)
-end)
-
-trace_codename("wq_run_threadreq_req_select", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[3] == 1 then
-               printf("%s\t\tselected event manager request %x\n", prefix, buf[2])
-       elseif buf[3] == 2 then
-               printf("%s\t\tselected overcommit request %x\n", prefix, buf[2])
-       elseif buf[3] == 3 then
-               printf("%s\t\tselected constrained request %x\n", prefix, buf[2])
-       else
-               printf("%s\t\tWARNING: UNKNOWN DECISION CODE:%d\n", prefix, buf.arg[3])
-       end
-end)
-
-trace_codename("wq_run_threadreq_thread_select", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[2] == 1 then
-               printf("%s\t\trunning on current thread %x\n", prefix, buf[3])
-       elseif buf[2] == 2 then
-               printf("%s\t\trunning on idle thread %x\n", prefix, buf[3])
-       elseif buf[2] == 3 then
-               printf("%s\t\tcreated new thread\n", prefix)
-       else
-               printf("%s\t\tWARNING: UNKNOWN DECISION CODE:%d\n", prefix, buf.arg[2])
-       end
-end)
-
-trace_codename("wq_thread_reset_priority", function(buf)
-       local prefix = get_prefix(buf)
-       local old_qos = buf[3] >> 16;
-       local new_qos = buf[3] & 0xff;
-       if buf[4] == 1 then
-               printf("%s\t\treset priority of %x from %s to %s\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-       elseif buf[4] == 2 then
-               printf("%s\t\treset priority of %x from %s to %s for reserve manager\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-       elseif buf[4] == 3 then
-               printf("%s\t\treset priority of %x from %s to %s for cleanup\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-       end
-end)
-
-trace_codename("wq_thread_park", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tthread parking\n", prefix)
-       else
-               printf("%s\tthread woken\n", prefix)
-       end
-end)
-
-trace_codename("wq_thread_squash", function(buf)
-       local prefix = get_prefix(buf)
-       printf("%s\tthread squashed from %s to %s\n", prefix,
-                       parse_qos_bucket(buf[2]), parse_qos_bucket(buf[3]))
-end)
-
-trace.enable_thread_cputime()
-runitem_time_map = {}
-runitem_cputime_map = {}
-trace_codename("wq_runitem", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               runitem_time_map[buf.threadid] = buf.timestamp;
-               runitem_cputime_map[buf.threadid] = trace.cputime_for_thread(buf.threadid);
-
-               printf("%s\tSTART running item @ %s\n", prefix, parse_qos_bucket(buf[3]))
-       elseif runitem_time_map[buf.threadid] then
-               local time = buf.timestamp - runitem_time_map[buf.threadid]
-               local cputime = trace.cputime_for_thread(buf.threadid) - runitem_cputime_map[buf.threadid]
-
-               local time_ms = trace.convert_timestamp_to_nanoseconds(time) / 1000000
-               local cputime_ms = trace.convert_timestamp_to_nanoseconds(cputime) / 1000000
-
-               printf("%s\tDONE running item @ %s: time = %6.6f ms, cputime = %6.6f ms\n",
-                               prefix, parse_qos_bucket(buf[2]), time_ms, cputime_ms)
-
-               runitem_time_map[buf.threadid] = 0
-               runitem_cputime_map[buf.threadid] = 0
-       else
-               printf("%s\tDONE running item @ %s\n", prefix, parse_qos_bucket(buf[2]))
-       end
-end)
-
-trace_codename("wq_runthread", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tSTART running thread\n", prefix)
-       elseif trace.debugid_is_end(buf.debugid) then
-               printf("%s\tDONE running thread\n", prefix)
-       end
-end)
-
-trace_codename("wq_thactive_update", function(buf)
-    local prefix = get_prefix(buf)
-    local thactive = get_thactive(buf[2], buf[3])
-    if buf[1] == 1 then
-        printf("%s\tthactive constrained pre-post (%s)\n", prefix, thactive)
-    elseif buf[1] == 2 then
-        printf("%s\tthactive constrained run (%s)\n", prefix, thactive)
-    else
-        return
-    end
-end)
-
-trace_codename("wq_thread_block", function(buf)
-       local prefix = get_prefix(buf)
-        local req_pri = parse_thactive_req_bucket(buf[3] >> 8)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tthread blocked (activecount: %d, priority: %s, req_pri: %s, reqcount: %d, start_timer: %d)\n",
-                       prefix, buf[2], parse_qos_bucket(buf[3] & 0xff), req_pri, buf[4] >> 1, buf[4] & 0x1)
-       else
-               printf("%s\tthread unblocked (activecount: %d, priority: %s, req_pri: %s, threads_scheduled: %d)\n",
-                       prefix, buf[2], parse_qos_bucket(buf[3] & 0xff), req_pri, buf[4])
-       end
-end)
-
-trace_codename("wq_thread_create_failed", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[3] == 0 then
-               printf("%s\tfailed to create new workqueue thread, kern_return: 0x%x\n",
-                       prefix, buf[2])
-       elseif buf[3] == 1 then
-               printf("%s\tfailed to vm_map workq thread stack: 0x%x\n", prefix, buf[2])
-       elseif buf[3] == 2 then
-               printf("%s\tfailed to vm_protect workq thread guardsize: 0x%x\n", prefix, buf[2])
-       end
-end)
-
-trace_codename("wq_thread_create", function(buf)
-       printf("%s\tcreated new workqueue thread\n", get_prefix(buf))
-end)
-
-trace_codename("wq_wqops_reqthreads", function(buf)
-       local prefix = get_prefix(buf)
-       printf("%s\tuserspace requested %d threads at %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-end)
-
-trace_codename("wq_kevent_reqthreads", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[4] == 0 then
-               printf("%s\tkevent requested a thread at %s\n", prefix, parse_pthread_priority(buf[3]));
-       elseif buf[4] == 1 then
-               printf("%s\tworkloop requested a thread for req %x at %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-       elseif buf[4] == 2 then
-               printf("%s\tworkloop updated priority of req %x to %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-       elseif buf[4] == 3 then
-               printf("%s\tworkloop canceled req %x\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-       elseif buf[4] == 4 then
-               printf("%s\tworkloop redrove a thread request\n", prefix);
-       end
-end)
-
-trace_codename("wq_constrained_admission", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[2] == 1 then
-               printf("fail: %s\twq_constrained_threads_scheduled=%d >= wq_max_constrained_threads=%d\n",
-                prefix, buf[3], buf[4])
-       elseif (buf[2] == 2) or (buf[2] == 3) then
-               local success = nil;
-               if buf[2] == 2 then success = "success"
-               else success = "fail" end
-               printf("%s: %s\tthactive_count=%d + busycount=%d >= wq->wq_max_concurrency\n",
-                               prefix, success, buf[3], buf[4])
-       end
-end)
-
--- The trace codes we need aren't enabled by default
-darwin.sysctlbyname("kern.pthread_debug_tracing", 1)
-completion_handler = function()
-       darwin.sysctlbyname("kern.pthread_debug_tracing", 0)
-end
-trace.set_completion_handler(completion_handler)
index e50ee44abe044946da915f5183af36116a81e5c6..9501f969e6bd98298022414bb311804ac367b7ed 100644 (file)
@@ -2,4 +2,17 @@
 # install the pthread lldbmacros into the module
 
 mkdir -p $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python || true
 # install the pthread lldbmacros into the module
 
 mkdir -p $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python || true
-rsync -aq $SRCROOT/lldbmacros/* $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python/
+rsync -aq $SRCROOT/lldbmacros/* $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python
+
+for variant in $BUILD_VARIANTS; do
+       case $variant in
+       normal)
+               SUFFIX=""
+               ;;
+       *)
+               SUFFIX="_$variant"
+               ;;
+       esac
+
+       ln -sf init.py $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python/$EXECUTABLE_NAME$SUFFIX.py
+done
index fcd42ea2477ea2d531e600d310881b99ad94be5a..84e90796bddc820d2695bfef0642ac03a1429923 100644 (file)
@@ -41,7 +41,7 @@ LLVM_LTO_development = NO
 LLVM_LTO_kasan = NO
 LLVM_LTO = $(LLVM_LTO_$(PTHREAD_VARIANT))
 
 LLVM_LTO_kasan = NO
 LLVM_LTO = $(LLVM_LTO_$(PTHREAD_VARIANT))
 
-GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T
+GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T __PTHREAD_EXPOSE_INTERNALS__
 GCC_PREPROCESSOR_DEFINITIONS_kext_development = MACH_ASSERT DEBUG
 GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS_kext) $(GCC_PREPROCESSOR_DEFINITIONS_kext_$(PTHREAD_VARIANT))
 
 GCC_PREPROCESSOR_DEFINITIONS_kext_development = MACH_ASSERT DEBUG
 GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS_kext) $(GCC_PREPROCESSOR_DEFINITIONS_kext_$(PTHREAD_VARIANT))
 
diff --git a/xcodescripts/pthread.dirty b/xcodescripts/pthread.dirty
new file mode 100644 (file)
index 0000000..2a8f66e
--- /dev/null
@@ -0,0 +1,33 @@
+# cacheline-aligned
+
+# uint64_t sized
+___pthread_stack_hint
+
+# pointer-sized
+___libdispatch_keventfunction
+___libdispatch_workerfunction
+___libdispatch_workloopfunction
+___pthread_head
+__main_thread_ptr
+__pthread_free
+__pthread_keys
+__pthread_malloc
+__pthread_ptr_munge_token
+_exitf
+
+# int-sized
+___is_threaded
+___libdispatch_offset
+___pthread_supported_features
+___pthread_tsd_lock
+___pthread_tsd_max
+___unix_conforming
+__main_qos
+__pthread_count
+__pthread_list_lock
+
+# byte-sized
+___workq_newapi
+_default_priority
+_max_priority
+_min_priority
index 7b2f244451ac6e034c58655f3a4dfe0024134d22..1dedcaa5a2353f880edeef01161f7aff885d34e0 100644 (file)
@@ -57,7 +57,7 @@ DISABLED_WARNING_CFLAGS = -Wno-int-conversion -Wno-missing-prototypes -Wno-sign-
 WARNING_CFLAGS = -Wall -Wextra -Warray-bounds-pointer-arithmetic -Wcomma -Wconditional-uninitialized -Wcovered-switch-default -Wdate-time -Wdeprecated -Wdouble-promotion -Wduplicate-enum -Wfloat-equal -Widiomatic-parentheses -Wignored-qualifiers -Wimplicit-fallthrough -Wmissing-noreturn -Wnullable-to-nonnull-conversion -Wover-aligned -Wpointer-arith -Wstatic-in-inline -Wtautological-compare -Wunguarded-availability -Wunused $(NO_WARNING_CFLAGS) $(DISABLED_WARNING_CFLAGS)
 NO_WARNING_CFLAGS = -Wno-pedantic -Wno-bad-function-cast -Wno-c++98-compat-pedantic -Wno-cast-align -Wno-cast-qual -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-format-nonliteral -Wno-missing-variable-declarations -Wno-packed -Wno-padded -Wno-reserved-id-macro -Wno-switch-enum -Wno-undef -Wno-unreachable-code-aggressive -Wno-unused-macros -Wno-used-but-marked-unused
 
 WARNING_CFLAGS = -Wall -Wextra -Warray-bounds-pointer-arithmetic -Wcomma -Wconditional-uninitialized -Wcovered-switch-default -Wdate-time -Wdeprecated -Wdouble-promotion -Wduplicate-enum -Wfloat-equal -Widiomatic-parentheses -Wignored-qualifiers -Wimplicit-fallthrough -Wmissing-noreturn -Wnullable-to-nonnull-conversion -Wover-aligned -Wpointer-arith -Wstatic-in-inline -Wtautological-compare -Wunguarded-availability -Wunused $(NO_WARNING_CFLAGS) $(DISABLED_WARNING_CFLAGS)
 NO_WARNING_CFLAGS = -Wno-pedantic -Wno-bad-function-cast -Wno-c++98-compat-pedantic -Wno-cast-align -Wno-cast-qual -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-format-nonliteral -Wno-missing-variable-declarations -Wno-packed -Wno-padded -Wno-reserved-id-macro -Wno-switch-enum -Wno-undef -Wno-unreachable-code-aggressive -Wno-unused-macros -Wno-used-but-marked-unused
 
-BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS)
+BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS) __PTHREAD_EXPOSE_INTERNALS__
 GCC_PREPROCESSOR_DEFINITIONS = $(BASE_PREPROCESSOR_MACROS) $(PLATFORM_PREPROCESSOR_DEFINITIONS)
 
 // TODO: Remove -fstack-protector on _debug when it is moved to libplatform
 GCC_PREPROCESSOR_DEFINITIONS = $(BASE_PREPROCESSOR_MACROS) $(PLATFORM_PREPROCESSOR_DEFINITIONS)
 
 // TODO: Remove -fstack-protector on _debug when it is moved to libplatform
@@ -68,8 +68,10 @@ OTHER_CFLAGS_debug = -fno-inline -O0 -DDEBUG=1
 LINK_WITH_STANDARD_LIBRARIES = NO
 DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
 DYLIB_COMPATIBILITY_VERSION = 1
 LINK_WITH_STANDARD_LIBRARIES = NO
 DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
 DYLIB_COMPATIBILITY_VERSION = 1
+DIRTY_LDFLAGS = -Wl,-dirty_data_list,$(SRCROOT)/xcodescripts/pthread.dirty
+DIRTY_LDFLAGS[sdk=macos*] =
 DYLIB_LDFLAGS = -Wl,-alias_list,$(SRCROOT)/xcodescripts/pthread.aliases -Wl,-umbrella,System -L/usr/lib/system -lsystem_kernel -lsystem_platform -ldyld -lcompiler_rt
 DYLIB_LDFLAGS = -Wl,-alias_list,$(SRCROOT)/xcodescripts/pthread.aliases -Wl,-umbrella,System -L/usr/lib/system -lsystem_kernel -lsystem_platform -ldyld -lcompiler_rt
-OTHER_LDFLAGS = $(DYLIB_LDFLAGS) $(CR_LDFLAGS) $(PLATFORM_LDFLAGS)
+OTHER_LDFLAGS = $(DYLIB_LDFLAGS) $(DIRTY_LDFLAGS) $(CR_LDFLAGS) $(PLATFORM_LDFLAGS)
 
 // Simulator build rules
 EXCLUDED_SOURCE_FILE_NAMES[sdk=iphonesimulator*] = *.c *.s
 
 // Simulator build rules
 EXCLUDED_SOURCE_FILE_NAMES[sdk=iphonesimulator*] = *.c *.s
index 2b33118d6124a08ce3e573792e5bb6b9b90c60f1..863252aedf7a23ab0cc70e38d85825ad927de98e 100644 (file)
@@ -1,6 +1,6 @@
 #include "pthread.xcconfig"
 
 #include "pthread.xcconfig"
 
-SUPPORTED_PLATFORMS = iphoneos appletvos watchos
+SUPPORTED_PLATFORMS = iphoneos
 PRODUCT_NAME = pthread_$(RESOLVED_VARIANT)
 OTHER_LDFLAGS =
 SKIP_INSTALL = YES
 PRODUCT_NAME = pthread_$(RESOLVED_VARIANT)
 OTHER_LDFLAGS =
 SKIP_INSTALL = YES