libpthread-330.201.1.tar.gz

author Apple <opensource@apple.com>

Thu, 4 Oct 2018 22:01:40 +0000 (22:01 +0000)

committer Apple <opensource@apple.com>

Thu, 4 Oct 2018 22:01:40 +0000 (22:01 +0000)
author Apple <opensource@apple.com>
Thu, 4 Oct 2018 22:01:40 +0000 (22:01 +0000)
committer Apple <opensource@apple.com>
Thu, 4 Oct 2018 22:01:40 +0000 (22:01 +0000)
diff --git a/kern/kern_init.c b/kern/kern_init.c

index 3de9b5d034ed71a41164c8f94de37d1534db8b10..3321483ffafae2a6e7bf650006bee3242a3358cc 100644 (file)
--- a/kern/kern_init.c
+++ b/kern/kern_init.c
@@ -17,21 +17,12 @@ pthread_callbacks_t pthread_kern;
  
  const struct pthread_functions_s pthread_internal_functions = {
         .pthread_init = _pthread_init,
-       .fill_procworkqueue = (int(*)(proc_t, void*))_fill_procworkqueue,
-       .get_pwq_state_kdp = _get_pwq_state_kdp,
-       .workqueue_exit = _workqueue_exit,
-       .workqueue_mark_exiting = _workqueue_mark_exiting,
-       .workqueue_thread_yielded = _workqueue_thread_yielded,
-       .workqueue_get_sched_callback = _workqueue_get_sched_callback,
         .pth_proc_hashinit = _pth_proc_hashinit,
         .pth_proc_hashdelete = _pth_proc_hashdelete,
         .bsdthread_create = _bsdthread_create,
         .bsdthread_register = _bsdthread_register,
         .bsdthread_terminate = _bsdthread_terminate,
-       .bsdthread_ctl = _bsdthread_ctl,
         .thread_selfid = _thread_selfid,
-       .workq_kernreturn = _workq_kernreturn,
-       .workq_open = _workq_open,
  
         .psynch_mutexwait = _psynch_mutexwait,
         .psynch_mutexdrop = _psynch_mutexdrop,
@@ -48,12 +39,11 @@ const struct pthread_functions_s pthread_internal_functions = {
         .pthread_find_owner = _pthread_find_owner,
         .pthread_get_thread_kwq = _pthread_get_thread_kwq,
  
-       .workq_reqthreads = _workq_reqthreads,
-       .thread_qos_from_pthread_priority = _thread_qos_from_pthread_priority,
-       .pthread_priority_canonicalize2 = _pthread_priority_canonicalize,
-       .workq_thread_has_been_unbound = _workq_thread_has_been_unbound,
-       .workq_threadreq = workq_kern_threadreq,
-       .workq_threadreq_modify = workq_kern_threadreq_modify,
+       .workq_create_threadstack = workq_create_threadstack,
+       .workq_destroy_threadstack = workq_destroy_threadstack,
+       .workq_setup_thread = workq_setup_thread,
+       .workq_handle_stack_events = workq_handle_stack_events,
+       .workq_markfree_threadstack = workq_markfree_threadstack,
  };
  
  kern_return_t pthread_start(__unused kmod_info_t * ki, __unused void *d)
diff --git a/kern/kern_internal.h b/kern/kern_internal.h

index fa2c27b926f3c89f65312bbe8a6123db96c69380..bb29cdc6cb0af7f59d4ba245d4816fc83d524bfc 100644 (file)
--- a/kern/kern_internal.h
+++ b/kern/kern_internal.h
@@ -29,7 +29,12 @@
  #ifndef _SYS_PTHREAD_INTERNAL_H_
  #define _SYS_PTHREAD_INTERNAL_H_
  
+#include <pthread/bsdthread_private.h>
+#include <pthread/priority_private.h>
+#include <pthread/workqueue_syscalls.h>
+
  #ifdef KERNEL
+struct ksyn_waitq_element;
  #include <stdatomic.h>
  #include <kern/thread_call.h>
  #include <kern/kcdata.h>
@@ -64,101 +69,6 @@
  #define PTHREAD_FEATURE_WORKLOOP          0x80         /* supports workloops */
  #define PTHREAD_FEATURE_QOS_DEFAULT            0x40000000      /* the kernel supports QOS_CLASS_DEFAULT */
  
-/* pthread bsdthread_ctl sysctl commands */
-#define BSDTHREAD_CTL_SET_QOS                          0x10    /* bsdthread_ctl(BSDTHREAD_CTL_SET_QOS, thread_port, tsd_entry_addr, 0) */
-#define BSDTHREAD_CTL_GET_QOS                          0x20    /* bsdthread_ctl(BSDTHREAD_CTL_GET_QOS, thread_port, 0, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_START       0x40    /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_START, thread_port, priority, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_END         0x80    /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_END, thread_port, 0, 0) */
-#define BSDTHREAD_CTL_SET_SELF                         0x100   /* bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, flags) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_RESET       0x200   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_RESET, 0, 0, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH    0x400   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH, thread_port, priority, 0) */
-#define BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD                           0x401   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD, thread_port, priority, resource) */
-#define BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET                         0x402   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET, 0|1 (?reset_all), resource, 0) */
-#define BSDTHREAD_CTL_QOS_MAX_PARALLELISM      0x800   /* bsdthread_ctl(BSDTHREAD_CTL_QOS_MAX_PARALLELISM, priority, flags, 0) */
-
-/* qos_class_t is mapped into one of these bits in the bitfield, this mapping now exists here because
- * libdispatch requires the QoS class mask of the pthread_priority_t to be a bitfield.
- */
-#define __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE 0x20
-#define __PTHREAD_PRIORITY_CBIT_USER_INITIATED 0x10
-#define __PTHREAD_PRIORITY_CBIT_DEFAULT 0x8
-#define __PTHREAD_PRIORITY_CBIT_UTILITY 0x4
-#define __PTHREAD_PRIORITY_CBIT_BACKGROUND 0x2
-#define __PTHREAD_PRIORITY_CBIT_MAINTENANCE 0x1
-#define __PTHREAD_PRIORITY_CBIT_UNSPECIFIED 0x0
-
-static inline int
-_pthread_qos_class_to_thread_qos(qos_class_t qos)
-{
-       switch (qos) {
-       case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
-       case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
-       case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
-       case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
-       case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
-       case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
-       default: return THREAD_QOS_UNSPECIFIED;
-       }
-}
-
-static inline pthread_priority_t
-_pthread_priority_make_newest(qos_class_t qc, int rel, unsigned long flags)
-{
-       pthread_priority_t cls;
-       switch (qc) {
-               case QOS_CLASS_USER_INTERACTIVE: cls = __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE; break;
-               case QOS_CLASS_USER_INITIATED: cls = __PTHREAD_PRIORITY_CBIT_USER_INITIATED; break;
-               case QOS_CLASS_DEFAULT: cls = __PTHREAD_PRIORITY_CBIT_DEFAULT; break;
-               case QOS_CLASS_UTILITY: cls = __PTHREAD_PRIORITY_CBIT_UTILITY; break;
-               case QOS_CLASS_BACKGROUND: cls = __PTHREAD_PRIORITY_CBIT_BACKGROUND; break;
-               case QOS_CLASS_MAINTENANCE: cls = __PTHREAD_PRIORITY_CBIT_MAINTENANCE; break;
-               case QOS_CLASS_UNSPECIFIED:
-               default:
-                       cls = __PTHREAD_PRIORITY_CBIT_UNSPECIFIED;
-                       rel = 1; // results in priority bits == 0 <rdar://problem/16184900>
-                       break;
-       }
-
-       pthread_priority_t p =
-               (flags & _PTHREAD_PRIORITY_FLAGS_MASK) |
-               ((cls << _PTHREAD_PRIORITY_QOS_CLASS_SHIFT) & _PTHREAD_PRIORITY_QOS_CLASS_MASK) |
-               (((uint8_t)rel - 1) & _PTHREAD_PRIORITY_PRIORITY_MASK);
-
-       return p;
-}
-
-static inline qos_class_t
-_pthread_priority_get_qos_newest(pthread_priority_t priority)
-{
-       qos_class_t qc;
-       switch ((priority & _PTHREAD_PRIORITY_QOS_CLASS_MASK) >> _PTHREAD_PRIORITY_QOS_CLASS_SHIFT) {
-               case __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE: qc = QOS_CLASS_USER_INTERACTIVE; break;
-               case __PTHREAD_PRIORITY_CBIT_USER_INITIATED: qc = QOS_CLASS_USER_INITIATED; break;
-               case __PTHREAD_PRIORITY_CBIT_DEFAULT: qc = QOS_CLASS_DEFAULT; break;
-               case __PTHREAD_PRIORITY_CBIT_UTILITY: qc = QOS_CLASS_UTILITY; break;
-               case __PTHREAD_PRIORITY_CBIT_BACKGROUND: qc = QOS_CLASS_BACKGROUND; break;
-               case __PTHREAD_PRIORITY_CBIT_MAINTENANCE: qc = QOS_CLASS_MAINTENANCE; break;
-               case __PTHREAD_PRIORITY_CBIT_UNSPECIFIED:
-               default: qc = QOS_CLASS_UNSPECIFIED; break;
-       }
-       return qc;
-}
-
-#define _pthread_priority_get_relpri(priority) \
-       ((int8_t)((priority & _PTHREAD_PRIORITY_PRIORITY_MASK) >> _PTHREAD_PRIORITY_PRIORITY_SHIFT) + 1)
-
-#define _pthread_priority_get_flags(priority) \
-       (priority & _PTHREAD_PRIORITY_FLAGS_MASK)
-
-#define _pthread_priority_split_newest(priority, qos, relpri) \
-       ({ qos = _pthread_priority_get_qos_newest(priority); \
-          relpri = (qos == QOS_CLASS_UNSPECIFIED) ? 0 : \
-                  _pthread_priority_get_relpri(priority); \
-       })
-
-#define _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL 0x1
-#define _PTHREAD_QOS_PARALLELISM_REALTIME 0x2
-
  /* userspace <-> kernel registration struct, for passing data to/from the kext during main thread init. */
  struct _pthread_registration_data {
         /*
@@ -177,9 +87,16 @@ struct _pthread_registration_data {
         uint32_t tsd_offset; /* copy-in */
         uint32_t return_to_kernel_offset; /* copy-in */
         uint32_t mach_thread_self_offset; /* copy-in */
+       mach_vm_address_t stack_addr_hint; /* copy-out */
         uint32_t mutex_default_policy; /* copy-out */
  } __attribute__ ((packed));
  
+/*
+ * "error" flags returned by fail condvar syscalls
+ */
+#define ECVCLEARED     0x100
+#define ECVPREPOST     0x200
+
  #ifdef KERNEL
  
  /* The set of features, from the feature bits above, that we support. */
@@ -198,23 +115,16 @@ extern pthread_callbacks_t pthread_kern;
  struct ksyn_waitq_element {
         TAILQ_ENTRY(ksyn_waitq_element) kwe_list;       /* link to other list members */
         void *          kwe_kwqqueue;                   /* queue blocked on */
-       uint32_t        kwe_state;                      /* state */
+       thread_t        kwe_thread;
+       uint16_t        kwe_state;                      /* state */
+       uint16_t        kwe_flags;
         uint32_t        kwe_lockseq;                    /* the sequence of the entry */
         uint32_t        kwe_count;                      /* upper bound on number of matches still pending */
         uint32_t        kwe_psynchretval;               /* thread retval */
         void            *kwe_uth;                       /* uthread */
-       uint64_t        kwe_tid;                        /* tid of waiter */
  };
  typedef struct ksyn_waitq_element * ksyn_waitq_element_t;
  
-pthread_priority_t thread_qos_get_pthread_priority(int qos) __attribute__((const));
-int thread_qos_get_class_index(int qos) __attribute__((const));
-int pthread_priority_get_thread_qos(pthread_priority_t priority) __attribute__((const));
-int pthread_priority_get_class_index(pthread_priority_t priority) __attribute__((const));
-pthread_priority_t class_index_get_pthread_priority(int index) __attribute__((const));
-int class_index_get_thread_qos(int index) __attribute__((const));
-int qos_class_get_class_index(int qos) __attribute__((const));
-
  #define PTH_DEFAULT_STACKSIZE 512*1024
  #define MAX_PTHREAD_SIZE 64*1024
  
@@ -276,29 +186,24 @@ extern thread_call_t psynch_thcall;
  
  struct uthread* current_uthread(void);
  
-#define WORKQ_REQTHREADS_THREADREQ   0x1
-#define WORKQ_REQTHREADS_NOEMERGENCY 0x2
-
-// Call for the kernel's kevent system to request threads.  A list of QoS/event
-// counts should be provided, sorted by flags and then QoS class.  If the
-// identity of the thread to handle the request is known, it will be returned.
-// If a new thread must be created, NULL will be returned.
-thread_t _workq_reqthreads(struct proc *p, int requests_count,
-                                                  workq_reqthreads_req_t requests);
+int
+workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr);
  
-// Resolve a pthread_priority_t to a QoS/relative pri
-integer_t _thread_qos_from_pthread_priority(unsigned long pri, unsigned long *flags);
-// Clear out extraneous flags/pri info for putting in voucher
-pthread_priority_t _pthread_priority_canonicalize(pthread_priority_t pri, boolean_t for_propagation);
+int
+workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr);
  
-boolean_t _workq_thread_has_been_unbound(thread_t th, int qos_class);
+void
+workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
+               mach_port_name_t kport, int th_qos, int setup_flags, int upcall_flags);
  
-int workq_kern_threadreq(struct proc *p, workq_threadreq_t req,
-               enum workq_threadreq_type, unsigned long priority, int flags);
+int
+workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
+               user_addr_t stackaddr, mach_port_name_t kport,
+               user_addr_t events, int nevents, int upcall_flags);
  
-int workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t req,
-               enum workq_threadreq_op operation,
-               unsigned long arg1, unsigned long arg2);
+void
+workq_markfree_threadstack(proc_t p, thread_t th, vm_map_t vmap,
+               user_addr_t stackaddr);
  
  #endif // KERNEL
  
diff --git a/kern/kern_policy.c b/kern/kern_policy.c

deleted file mode 100644 (file)

index 98e0c61..0000000
--- a/kern/kern_policy.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2013 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include "kern_internal.h"
-#include <kern/debug.h>
-#include <kern/assert.h>
-
-pthread_priority_t
-thread_qos_get_pthread_priority(int qos)
-{
-       /* Map the buckets we have in pthread_priority_t into a QoS tier. */
-       switch (qos) {
-               case THREAD_QOS_USER_INTERACTIVE: return _pthread_priority_make_newest(QOS_CLASS_USER_INTERACTIVE, 0, 0);
-               case THREAD_QOS_USER_INITIATED: return _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, 0);
-               case THREAD_QOS_LEGACY: return _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, 0);
-               case THREAD_QOS_UTILITY: return _pthread_priority_make_newest(QOS_CLASS_UTILITY, 0, 0);
-               case THREAD_QOS_BACKGROUND: return _pthread_priority_make_newest(QOS_CLASS_BACKGROUND, 0, 0);
-               case THREAD_QOS_MAINTENANCE: return _pthread_priority_make_newest(QOS_CLASS_MAINTENANCE, 0, 0);
-               default: return _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
-       }
-}
-
-int
-thread_qos_get_class_index(int qos)
-{
-    switch (qos) {
-               case THREAD_QOS_USER_INTERACTIVE: return 0;
-               case THREAD_QOS_USER_INITIATED: return 1;
-               case THREAD_QOS_LEGACY: return 2;
-               case THREAD_QOS_UTILITY: return 3;
-               case THREAD_QOS_BACKGROUND: return 4;
-               case THREAD_QOS_MAINTENANCE: return 5;
-               default: return 2;
-    }
-}
-
-int
-pthread_priority_get_thread_qos(pthread_priority_t priority)
-{
-       /* Map the buckets we have in pthread_priority_t into a QoS tier. */
-       switch (_pthread_priority_get_qos_newest(priority)) {
-               case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
-               case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
-               case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
-               case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
-               case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
-               case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
-               default: return THREAD_QOS_UNSPECIFIED;
-       }
-}
-
-int
-pthread_priority_get_class_index(pthread_priority_t priority)
-{
-       return qos_class_get_class_index(_pthread_priority_get_qos_newest(priority));
-}
-
-pthread_priority_t
-class_index_get_pthread_priority(int index)
-{
-       qos_class_t qos;
-       switch (index) {
-               case 0: qos = QOS_CLASS_USER_INTERACTIVE; break;
-               case 1: qos = QOS_CLASS_USER_INITIATED; break;
-               case 2: qos = QOS_CLASS_DEFAULT; break;
-               case 3: qos = QOS_CLASS_UTILITY; break;
-               case 4: qos = QOS_CLASS_BACKGROUND; break;
-               case 5: qos = QOS_CLASS_MAINTENANCE; break;
-               case 6: assert(index != 6); // EVENT_MANAGER should be handled specially
-               default:
-                       /* Return the utility band if we don't understand the input. */
-                       qos = QOS_CLASS_UTILITY;
-       }
-
-       pthread_priority_t priority;
-       priority = _pthread_priority_make_newest(qos, 0, 0);
-
-       return priority;
-}
-
-int
-class_index_get_thread_qos(int class)
-{
-       int thread_qos;
-       switch (class) {
-               case 0: thread_qos = THREAD_QOS_USER_INTERACTIVE; break;
-               case 1: thread_qos = THREAD_QOS_USER_INITIATED; break;
-               case 2: thread_qos = THREAD_QOS_LEGACY; break;
-               case 3: thread_qos = THREAD_QOS_UTILITY; break;
-               case 4: thread_qos = THREAD_QOS_BACKGROUND; break;
-               case 5: thread_qos = THREAD_QOS_MAINTENANCE; break;
-               case 6: thread_qos = THREAD_QOS_LAST; break;
-               default:
-                       thread_qos = THREAD_QOS_LAST;
-       }
-       return thread_qos;
-}
-
-int
-qos_class_get_class_index(int qos)
-{
-       switch (qos){
-               case QOS_CLASS_USER_INTERACTIVE: return 0;
-               case QOS_CLASS_USER_INITIATED: return 1;
-               case QOS_CLASS_DEFAULT: return 2;
-               case QOS_CLASS_UTILITY: return 3;
-               case QOS_CLASS_BACKGROUND: return 4;
-               case QOS_CLASS_MAINTENANCE: return 5;
-               default:
-                       /* Return the default band if we don't understand the input. */
-                       return 2;
-       }
-}
-
-/**
- * Shims to help the kernel understand pthread_priority_t
- */
-
-integer_t
-_thread_qos_from_pthread_priority(unsigned long priority, unsigned long *flags)
-{
-    if (flags != NULL){
-        *flags = (int)_pthread_priority_get_flags(priority);
-    }
-    int thread_qos = pthread_priority_get_thread_qos(priority);
-    if (thread_qos == THREAD_QOS_UNSPECIFIED && flags != NULL){
-        *flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-    }
-    return thread_qos;
-}
-
-pthread_priority_t
-_pthread_priority_canonicalize(pthread_priority_t priority, boolean_t for_propagation)
-{
-       qos_class_t qos_class;
-       int relpri;
-       unsigned long flags = _pthread_priority_get_flags(priority);
-       _pthread_priority_split_newest(priority, qos_class, relpri);
-
-       if (for_propagation) {
-               flags = 0;
-               if (relpri > 0 || relpri < -15) relpri = 0;
-       } else {
-               if (qos_class == QOS_CLASS_UNSPECIFIED) {
-                       flags = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-               } else if (flags & (_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG|_PTHREAD_PRIORITY_SCHED_PRI_FLAG)){
-                       flags = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-                       qos_class = QOS_CLASS_UNSPECIFIED;
-               } else {
-                       flags &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-               }
-
-               relpri = 0;
-       }
-
-       return _pthread_priority_make_newest(qos_class, relpri, flags);
-}
diff --git a/kern/kern_support.c b/kern/kern_support.c

index 280a18b7565eb8c47b0b1b5e47d94a568298c820..e424cceac63775df0fe150a14de1d316cd3aa847 100644 (file)
--- a/kern/kern_support.c
+++ b/kern/kern_support.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   *
@@ -32,8 +32,8 @@
  
  #pragma mark - Front Matter
  
-#define  _PTHREAD_CONDATTR_T
-#define  _PTHREAD_COND_T
+#define _PTHREAD_CONDATTR_T
+#define _PTHREAD_COND_T
  #define _PTHREAD_MUTEXATTR_T
  #define _PTHREAD_MUTEX_T
  #define _PTHREAD_RWLOCKATTR_T
@@ -105,11 +105,11 @@ extern void panic(const char *string, ...) __printflike(1,2) __dead2;
  #include <libkern/OSAtomic.h>
  #include <libkern/libkern.h>
  
-#include <sys/pthread_shims.h>
  #include "kern_internal.h"
  
-// XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
-#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
+#ifndef WQ_SETUP_EXIT_THREAD
+#define WQ_SETUP_EXIT_THREAD    8
+#endif
  
  // XXX: Ditto for thread tags from kern/thread.h
  #define        THREAD_TAG_MAINTHREAD 0x1
@@ -120,53 +120,13 @@ lck_grp_attr_t   *pthread_lck_grp_attr;
  lck_grp_t    *pthread_lck_grp;
  lck_attr_t   *pthread_lck_attr;
  
-zone_t pthread_zone_workqueue;
-zone_t pthread_zone_threadlist;
-zone_t pthread_zone_threadreq;
-
-extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
-extern void workqueue_thread_yielded(void);
-
-#define WQ_SETUP_FIRST_USE  1
-#define WQ_SETUP_CLEAR_VOUCHER  2
-static void _setup_wqthread(proc_t p, thread_t th, struct workqueue *wq,
-               struct threadlist *tl, int flags);
-
-static void reset_priority(struct threadlist *tl, pthread_priority_t pri);
-static pthread_priority_t pthread_priority_from_wq_class_index(struct workqueue *wq, int index);
-
-static void wq_unpark_continue(void* ptr, wait_result_t wait_result) __dead2;
-
-static bool workqueue_addnewthread(proc_t p, struct workqueue *wq);
-static void workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use);
-static void workqueue_lock_spin(struct workqueue *);
-static void workqueue_unlock(struct workqueue *);
-
-#define WQ_RUN_TR_THROTTLED 0
-#define WQ_RUN_TR_THREAD_NEEDED 1
-#define WQ_RUN_TR_THREAD_STARTED 2
-#define WQ_RUN_TR_EXITING 3
-static int workqueue_run_threadreq_and_unlock(proc_t p, struct workqueue *wq,
-               struct threadlist *tl, struct threadreq *req, bool may_add_new_thread);
-
-static bool may_start_constrained_thread(struct workqueue *wq,
-               uint32_t at_priclass, struct threadlist *tl, bool may_start_timer);
-
-static mach_vm_offset_t stack_addr_hint(proc_t p, vm_map_t vmap);
-static boolean_t wq_thread_is_busy(uint64_t cur_ts,
-               _Atomic uint64_t *lastblocked_tsp);
-
-int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
-int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
-
-#define WQ_MAXPRI_MIN  0       /* low prio queue num */
-#define WQ_MAXPRI_MAX  2       /* max  prio queuenum */
-#define WQ_PRI_NUM     3       /* number of prio work queues */
-
  #define C_32_STK_ALIGN          16
  #define C_64_STK_ALIGN          16
  #define C_64_REDZONE_LEN        128
  
+// WORKQ use the largest alignment any platform needs
+#define C_WORKQ_STK_ALIGN       16
+
  #define PTHREAD_T_OFFSET 0
  
  /*
@@ -177,11 +137,12 @@ _________________________________________
  -----------------------------------------
  */
  
-#define PTHREAD_START_CUSTOM           0x01000000
+#define PTHREAD_START_CUSTOM           0x01000000 // <rdar://problem/34501401>
  #define PTHREAD_START_SETSCHED         0x02000000
-#define PTHREAD_START_DETACHED         0x04000000
+// was PTHREAD_START_DETACHED          0x04000000
  #define PTHREAD_START_QOSCLASS         0x08000000
  #define PTHREAD_START_TSD_BASE_SET     0x10000000
+#define PTHREAD_START_SUSPENDED                0x20000000
  #define PTHREAD_START_QOSCLASS_MASK    0x00ffffff
  #define PTHREAD_START_POLICY_BITSHIFT 16
  #define PTHREAD_START_POLICY_MASK 0xff
@@ -193,199 +154,13 @@ _________________________________________
  
  #define BASEPRI_DEFAULT 31
  
-#pragma mark sysctls
-
-static uint32_t wq_stalled_window_usecs        = WQ_STALLED_WINDOW_USECS;
-static uint32_t wq_reduce_pool_window_usecs    = WQ_REDUCE_POOL_WINDOW_USECS;
-static uint32_t wq_max_timer_interval_usecs    = WQ_MAX_TIMER_INTERVAL_USECS;
-static uint32_t wq_max_threads                 = WORKQUEUE_MAXTHREADS;
-static uint32_t wq_max_constrained_threads     = WORKQUEUE_MAXTHREADS / 8;
-static uint32_t wq_max_concurrency[WORKQUEUE_NUM_BUCKETS + 1]; // set to ncpus on load
-
-SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_stalled_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_reduce_pool_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_timer_interval_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_threads, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_constrained_threads, 0, "");
-
-#ifdef DEBUG
-static int wq_kevent_test SYSCTL_HANDLER_ARGS;
-SYSCTL_PROC(_debug, OID_AUTO, wq_kevent_test, CTLFLAG_MASKED | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLTYPE_OPAQUE, NULL, 0, wq_kevent_test, 0, "-");
-#endif
-
-static uint32_t wq_init_constrained_limit = 1;
-
  uint32_t pthread_debug_tracing = 1;
  
-SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
-                  &pthread_debug_tracing, 0, "")
-
  static uint32_t pthread_mutex_default_policy;
  
  SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED,
            &pthread_mutex_default_policy, 0, "");
  
-/*
- *       +-----+-----+-----+-----+-----+-----+-----+
- *       | MT  | BG  | UT  | DE  | IN  | UN  | mgr |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | pri |  5  |  4  |  3  |  2  |  1  |  0  |  6  |
- * | qos |  1  |  2  |  3  |  4  |  5  |  6  |  7  |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- */
-static inline uint32_t
-_wq_bucket_to_thread_qos(int pri)
-{
-       if (pri == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               return WORKQUEUE_EVENT_MANAGER_BUCKET + 1;
-       }
-       return WORKQUEUE_EVENT_MANAGER_BUCKET - pri;
-}
-
-#pragma mark wq_thactive
-
-#if defined(__LP64__)
-// Layout is:
-//   7 * 16 bits for each QoS bucket request count (including manager)
-//   3 bits of best QoS among all pending constrained requests
-//   13 bits of zeroes
-#define WQ_THACTIVE_BUCKET_WIDTH 16
-#define WQ_THACTIVE_QOS_SHIFT    (7 * WQ_THACTIVE_BUCKET_WIDTH)
-#else
-// Layout is:
-//   6 * 10 bits for each QoS bucket request count (except manager)
-//   1 bit for the manager bucket
-//   3 bits of best QoS among all pending constrained requests
-#define WQ_THACTIVE_BUCKET_WIDTH 10
-#define WQ_THACTIVE_QOS_SHIFT    (6 * WQ_THACTIVE_BUCKET_WIDTH + 1)
-#endif
-#define WQ_THACTIVE_BUCKET_MASK  ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1)
-#define WQ_THACTIVE_BUCKET_HALF  (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1))
-#define WQ_THACTIVE_NO_PENDING_REQUEST 6
-
-_Static_assert(sizeof(wq_thactive_t) * CHAR_BIT - WQ_THACTIVE_QOS_SHIFT >= 3,
-               "Make sure we have space to encode a QoS");
-
-static inline wq_thactive_t
-_wq_thactive_fetch_and_add(struct workqueue *wq, wq_thactive_t offset)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
-       return atomic_fetch_add_explicit(&wq->wq_thactive, offset,
-                       memory_order_relaxed);
-#else
-       return pthread_kern->atomic_fetch_add_128_relaxed(&wq->wq_thactive, offset);
-#endif
-}
-
-static inline wq_thactive_t
-_wq_thactive(struct workqueue *wq)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
-       return atomic_load_explicit(&wq->wq_thactive, memory_order_relaxed);
-#else
-       return pthread_kern->atomic_load_128_relaxed(&wq->wq_thactive);
-#endif
-}
-
-#define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \
-               ((tha) >> WQ_THACTIVE_QOS_SHIFT)
-
-static inline uint32_t
-_wq_thactive_best_constrained_req_qos(struct workqueue *wq)
-{
-       // Avoid expensive atomic operations: the three bits we're loading are in
-       // a single byte, and always updated under the workqueue lock
-       wq_thactive_t v = *(wq_thactive_t *)&wq->wq_thactive;
-       return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v);
-}
-
-static inline wq_thactive_t
-_wq_thactive_set_best_constrained_req_qos(struct workqueue *wq,
-               uint32_t orig_qos, uint32_t new_qos)
-{
-       wq_thactive_t v;
-       v = (wq_thactive_t)(new_qos - orig_qos) << WQ_THACTIVE_QOS_SHIFT;
-       /*
-        * We can do an atomic add relative to the initial load because updates
-        * to this qos are always serialized under the workqueue lock.
-        */
-       return _wq_thactive_fetch_and_add(wq, v) + v;
-}
-
-static inline wq_thactive_t
-_wq_thactive_offset_for_qos(int qos)
-{
-       return (wq_thactive_t)1 << (qos * WQ_THACTIVE_BUCKET_WIDTH);
-}
-
-static inline wq_thactive_t
-_wq_thactive_inc(struct workqueue *wq, int qos)
-{
-       return _wq_thactive_fetch_and_add(wq, _wq_thactive_offset_for_qos(qos));
-}
-
-static inline wq_thactive_t
-_wq_thactive_dec(struct workqueue *wq, int qos)
-{
-       return _wq_thactive_fetch_and_add(wq, -_wq_thactive_offset_for_qos(qos));
-}
-
-static inline wq_thactive_t
-_wq_thactive_move(struct workqueue *wq, int oldqos, int newqos)
-{
-       return _wq_thactive_fetch_and_add(wq, _wq_thactive_offset_for_qos(newqos) -
-                       _wq_thactive_offset_for_qos(oldqos));
-}
-
-static inline uint32_t
-_wq_thactive_aggregate_downto_qos(struct workqueue *wq, wq_thactive_t v,
-               int qos, uint32_t *busycount, uint32_t *max_busycount)
-{
-       uint32_t count = 0, active;
-       uint64_t curtime;
-
-#ifndef __LP64__
-       /*
-        * on 32bits the manager bucket is a single bit and the best constrained
-        * request QoS 3 bits are where the 10 bits of a regular QoS bucket count
-        * would be. Mask them out.
-        */
-       v &= ~(~0ull << WQ_THACTIVE_QOS_SHIFT);
-#endif
-       if (busycount) {
-               curtime = mach_absolute_time();
-               *busycount = 0;
-       }
-       if (max_busycount) {
-               *max_busycount = qos + 1;
-       }
-       for (int i = 0; i <= qos; i++, v >>= WQ_THACTIVE_BUCKET_WIDTH) {
-               active = v & WQ_THACTIVE_BUCKET_MASK;
-               count += active;
-               if (busycount && wq->wq_thscheduled_count[i] > active) {
-                       if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i])) {
-                               /*
-                                * We only consider the last blocked thread for a given bucket
-                                * as busy because we don't want to take the list lock in each
-                                * sched callback. However this is an approximation that could
-                                * contribute to thread creation storms.
-                                */
-                               (*busycount)++;
-                       }
-               }
-       }
-       return count;
-}
-
  #pragma mark - Process/Thread Setup/Teardown syscalls
  
  static mach_vm_offset_t
@@ -445,41 +220,45 @@ stack_addr_hint(proc_t p, vm_map_t vmap)
         return stackaddr;
  }
  
+static bool
+_pthread_priority_to_policy(pthread_priority_t priority,
+               thread_qos_policy_data_t *data)
+{
+       data->qos_tier = _pthread_priority_thread_qos(priority);
+       data->tier_importance = _pthread_priority_relpri(priority);
+       if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 ||
+                       data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
+               return false;
+       }
+       return true;
+}
+
  /**
   * bsdthread_create system call.  Used by pthread_create.
   */
  int
-_bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
+_bsdthread_create(struct proc *p,
+               __unused user_addr_t user_func, __unused user_addr_t user_funcarg,
+               user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags,
+               user_addr_t *retval)
  {
         kern_return_t kret;
         void * sright;
         int error = 0;
-       int allocated = 0;
-       mach_vm_offset_t stackaddr;
-       mach_vm_size_t th_allocsize = 0;
-       mach_vm_size_t th_guardsize;
-       mach_vm_offset_t th_stack;
-       mach_vm_offset_t th_pthread;
         mach_vm_offset_t th_tsd_base;
         mach_port_name_t th_thport;
         thread_t th;
-       vm_map_t vmap = pthread_kern->current_map();
         task_t ctask = current_task();
         unsigned int policy, importance;
         uint32_t tsd_offset;
-
-       int isLP64 = 0;
+       bool start_suspended = (flags & PTHREAD_START_SUSPENDED);
  
         if (pthread_kern->proc_get_register(p) == 0) {
                 return EINVAL;
         }
  
-       PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0);
-
-       isLP64 = proc_is64bit(p);
-       th_guardsize = vm_map_page_size(vmap);
+       PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0);
  
-       stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
         kret = pthread_kern->thread_create(ctask, &th);
         if (kret != KERN_SUCCESS)
                 return(ENOMEM);
@@ -495,152 +274,64 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
         }
  
         if ((flags & PTHREAD_START_CUSTOM) == 0) {
-               mach_vm_size_t pthread_size =
-                       vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(vmap));
-               th_allocsize = th_guardsize + user_stack + pthread_size;
-               user_stack += PTHREAD_T_OFFSET;
-
-               kret = mach_vm_map(vmap, &stackaddr,
-                               th_allocsize,
-                               page_size-1,
-                               VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
-                               0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-                               VM_INHERIT_DEFAULT);
-               if (kret != KERN_SUCCESS){
-                       kret = mach_vm_allocate(vmap,
-                                       &stackaddr, th_allocsize,
-                                       VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
-               }
-               if (kret != KERN_SUCCESS) {
-                       error = ENOMEM;
-                       goto out;
-               }
-
-               PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
-
-               allocated = 1;
-               /*
-                * The guard page is at the lowest address
-                * The stack base is the highest address
-                */
-               kret = mach_vm_protect(vmap,  stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
-
-               if (kret != KERN_SUCCESS) {
-                       error = ENOMEM;
-                       goto out1;
-               }
-
-               th_pthread = stackaddr + th_guardsize + user_stack;
-               th_stack = th_pthread;
-
-               /*
-               * Pre-fault the first page of the new thread's stack and the page that will
-               * contain the pthread_t structure.
-               */
-               if (vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
-                               vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap))){
-                       vm_fault( vmap,
-                                       vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
-                                       VM_PROT_READ | VM_PROT_WRITE,
-                                       FALSE,
-                                       THREAD_UNINT, NULL, 0);
-               }
-
-               vm_fault( vmap,
-                               vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap)),
-                               VM_PROT_READ | VM_PROT_WRITE,
-                               FALSE,
-                               THREAD_UNINT, NULL, 0);
-
-       } else {
-               th_stack = user_stack;
-               th_pthread = user_pthread;
-
-               PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
+               error = EINVAL;
+               goto out;
         }
  
+       PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3);
+
         tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
         if (tsd_offset) {
-               th_tsd_base = th_pthread + tsd_offset;
+               th_tsd_base = user_pthread + tsd_offset;
                 kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
                 if (kret == KERN_SUCCESS) {
                         flags |= PTHREAD_START_TSD_BASE_SET;
                 }
         }
+       /*
+        * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
+        * supports this flag (after the fact).
+        */
+       flags &= ~PTHREAD_START_SUSPENDED;
  
-#if defined(__i386__) || defined(__x86_64__)
         /*
-        * Set up i386 registers & function call.
+        * Set up registers & function call.
          */
-       if (isLP64 == 0) {
-               x86_thread_state32_t state = {
-                       .eip = (unsigned int)pthread_kern->proc_get_threadstart(p),
-                       .eax = (unsigned int)th_pthread,
-                       .ebx = (unsigned int)th_thport,
-                       .ecx = (unsigned int)user_func,
-                       .edx = (unsigned int)user_funcarg,
-                       .edi = (unsigned int)user_stack,
-                       .esi = (unsigned int)flags,
-                       /*
-                        * set stack pointer
-                        */
-                       .esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
+#if defined(__i386__) || defined(__x86_64__)
+       if (proc_is64bit_data(p)) {
+               x86_thread_state64_t state = {
+                       .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
+                       .rdi = (uint64_t)user_pthread,
+                       .rsi = (uint64_t)th_thport,
+                       .rdx = (uint64_t)user_func,    /* golang wants this */
+                       .rcx = (uint64_t)user_funcarg, /* golang wants this */
+                       .r8  = (uint64_t)user_stack,   /* golang wants this */
+                       .r9  = (uint64_t)flags,
+
+                       .rsp = (uint64_t)(user_stack - C_64_REDZONE_LEN)
                 };
  
-               error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-               if (error != KERN_SUCCESS) {
-                       error = EINVAL;
-                       goto out;
-               }
+               (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
         } else {
-               x86_thread_state64_t state64 = {
-                       .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
-                       .rdi = (uint64_t)th_pthread,
-                       .rsi = (uint64_t)(th_thport),
-                       .rdx = (uint64_t)user_func,
-                       .rcx = (uint64_t)user_funcarg,
-                       .r8 = (uint64_t)user_stack,
-                       .r9 = (uint64_t)flags,
-                       /*
-                        * set stack pointer aligned to 16 byte boundary
-                        */
-                       .rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN)
+               x86_thread_state32_t state = {
+                       .eip = (uint32_t)pthread_kern->proc_get_threadstart(p),
+                       .eax = (uint32_t)user_pthread,
+                       .ebx = (uint32_t)th_thport,
+                       .ecx = (uint32_t)user_func,    /* golang wants this */
+                       .edx = (uint32_t)user_funcarg, /* golang wants this */
+                       .edi = (uint32_t)user_stack,   /* golang wants this */
+                       .esi = (uint32_t)flags,
+
+                       .esp = (int)((vm_offset_t)(user_stack - C_32_STK_ALIGN))
                 };
  
-               error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
-               if (error != KERN_SUCCESS) {
-                       error = EINVAL;
-                       goto out;
-               }
-
+               (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
         }
-#elif defined(__arm__)
-       arm_thread_state_t state = {
-               .pc = (int)pthread_kern->proc_get_threadstart(p),
-               .r[0] = (unsigned int)th_pthread,
-               .r[1] = (unsigned int)th_thport,
-               .r[2] = (unsigned int)user_func,
-               .r[3] = (unsigned int)user_funcarg,
-               .r[4] = (unsigned int)user_stack,
-               .r[5] = (unsigned int)flags,
-
-               /* Set r7 & lr to 0 for better back tracing */
-               .r[7] = 0,
-               .lr = 0,
-
-               /*
-                * set stack pointer
-                */
-               .sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
-       };
-
-       (void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-
  #else
  #error bsdthread_create  not defined for this architecture
  #endif
  
-       if ((flags & PTHREAD_START_SETSCHED) != 0) {
+       if (flags & PTHREAD_START_SETSCHED) {
                 /* Set scheduling parameters if needed */
                 thread_extended_policy_data_t    extinfo;
                 thread_precedence_policy_data_t   precedinfo;
@@ -658,16 +349,16 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
  
                 precedinfo.importance = (importance - BASEPRI_DEFAULT);
                 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
-       } else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
+       } else if (flags & PTHREAD_START_QOSCLASS) {
                 /* Set thread QoS class if requested. */
-               pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK);
-
                 thread_qos_policy_data_t qos;
-               qos.qos_tier = pthread_priority_get_thread_qos(priority);
-               qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 :
-                               _pthread_priority_get_relpri(priority);
  
-               pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+               if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) {
+                       error = EINVAL;
+                       goto out;
+               }
+               pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY,
+                               (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
         }
  
         if (pthread_kern->proc_get_mach_thread_self_tsd_offset) {
@@ -677,37 +368,33 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
                         bool proc64bit = proc_is64bit(p);
                         if (proc64bit) {
                                 uint64_t th_thport_tsd = (uint64_t)th_thport;
-                               error = copyout(&th_thport_tsd, th_pthread + tsd_offset +
+                               error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
                                                 mach_thread_self_offset, sizeof(th_thport_tsd));
                         } else {
                                 uint32_t th_thport_tsd = (uint32_t)th_thport;
-                               error = copyout(&th_thport_tsd, th_pthread + tsd_offset +
+                               error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
                                                 mach_thread_self_offset, sizeof(th_thport_tsd));
                         }
                         if (error) {
-                               goto out1;
+                               goto out;
                         }
                 }
         }
  
-       kret = pthread_kern->thread_resume(th);
-       if (kret != KERN_SUCCESS) {
-               error = EINVAL;
-               goto out1;
+       if (!start_suspended) {
+               kret = pthread_kern->thread_resume(th);
+               if (kret != KERN_SUCCESS) {
+                       error = EINVAL;
+                       goto out;
+               }
         }
         thread_deallocate(th);  /* drop the creator reference */
  
-       PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
-
-       // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms
-       *retval = (user_addr_t)th_pthread;
+       PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0);
  
+       *retval = user_pthread;
         return(0);
  
-out1:
-       if (allocated != 0) {
-               (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
-       }
  out:
         (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
         if (pthread_kern->thread_will_park_or_terminate) {
@@ -737,21 +424,24 @@ _bsdthread_terminate(__unused struct proc *p,
         freeaddr = (mach_vm_offset_t)stackaddr;
         freesize = size;
  
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
+       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff);
  
         if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
                 if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){
                         vm_map_t user_map = pthread_kern->current_map();
                         freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map));
                         kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
-                       assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
+#if MACH_ASSERT
+                       if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) {
+                               os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret);
+                       }
+#endif
                         kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE);
                         assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
                 } else {
                         kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
                         if (kret != KERN_SUCCESS) {
-                               PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
-                               return(EINVAL);
+                               PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
                         }
                 }
         }
@@ -761,10 +451,9 @@ _bsdthread_terminate(__unused struct proc *p,
         }
         (void)thread_terminate(th);
         if (sem != MACH_PORT_NULL) {
-                kret = pthread_kern->semaphore_signal_internal_trap(sem);
+               kret = pthread_kern->semaphore_signal_internal_trap(sem);
                 if (kret != KERN_SUCCESS) {
-                       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
-                       return(EINVAL);
+                       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
                 }
         }
  
@@ -772,14 +461,10 @@ _bsdthread_terminate(__unused struct proc *p,
                 pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
         }
  
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0);
+       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0);
  
         pthread_kern->thread_exception_return();
-       panic("bsdthread_terminate: still running\n");
-
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0);
-
-       return(0);
+       __builtin_unreachable();
  }
  
  /**
@@ -873,29 +558,35 @@ _bsdthread_register(struct proc *p,
         if (pthread_init_data != 0) {
                 /* Outgoing data that userspace expects as a reply */
                 data.version = sizeof(struct _pthread_registration_data);
+               data.main_qos = _pthread_unspecified_priority();
+
                 if (pthread_kern->qos_main_thread_active()) {
                         mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
                         thread_qos_policy_data_t qos;
                         boolean_t gd = FALSE;
  
-                       kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
+                       kr = pthread_kern->thread_policy_get(current_thread(),
+                                       THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
                         if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
-                               /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
+                               /*
+                                * Unspecified threads means the kernel wants us
+                                * to impose legacy upon the thread.
+                                */
                                 qos.qos_tier = THREAD_QOS_LEGACY;
                                 qos.tier_importance = 0;
  
-                               kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+                               kr = pthread_kern->thread_policy_set_internal(current_thread(),
+                                               THREAD_QOS_POLICY, (thread_policy_t)&qos,
+                                               THREAD_QOS_POLICY_COUNT);
                         }
  
                         if (kr == KERN_SUCCESS) {
-                               data.main_qos = thread_qos_get_pthread_priority(qos.qos_tier);
-                       } else {
-                               data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+                               data.main_qos = _pthread_priority_make_from_thread_qos(
+                                               qos.qos_tier, 0, 0);
                         }
-               } else {
-                       data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
                 }
  
+               data.stack_addr_hint = stackaddr;
                 data.mutex_default_policy = pthread_mutex_default_policy;
  
                 kr = copyout(&data, pthread_init_data, pthread_init_sz);
@@ -910,2858 +601,220 @@ _bsdthread_register(struct proc *p,
         return(0);
  }
  
-#pragma mark - QoS Manipulation
+
+#pragma mark - Workqueue Thread Support
+
+static mach_vm_size_t
+workq_thread_allocsize(proc_t p, vm_map_t wq_map,
+               mach_vm_size_t *guardsize_out)
+{
+       mach_vm_size_t guardsize = vm_map_page_size(wq_map);
+       mach_vm_size_t pthread_size = vm_map_round_page_mask(
+                       pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET,
+                       vm_map_page_mask(wq_map));
+       if (guardsize_out) *guardsize_out = guardsize;
+       return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
+}
  
  int
-_bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
+workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr)
  {
-       int rv;
-       thread_t th;
+       mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
+       mach_vm_size_t guardsize, th_allocsize;
+       kern_return_t kret;
  
-       pthread_priority_t priority;
+       th_allocsize = workq_thread_allocsize(p, vmap, &guardsize);
+       kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1,
+                       VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE,
+                       VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
  
-       /* Unused parameters must be zero. */
-       if (arg3 != 0) {
-               return EINVAL;
+       if (kret != KERN_SUCCESS) {
+               kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize,
+                               VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
         }
  
-       /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
-       if (proc_is64bit(p)) {
-               uint64_t v;
-               rv = copyin(tsd_priority_addr, &v, sizeof(v));
-               if (rv) goto out;
-               priority = (int)(v & 0xffffffff);
-       } else {
-               uint32_t v;
-               rv = copyin(tsd_priority_addr, &v, sizeof(v));
-               if (rv) goto out;
-               priority = v;
+       if (kret != KERN_SUCCESS) {
+               goto fail;
         }
  
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
+       /*
+        * The guard page is at the lowest address
+        * The stack base is the highest address
+        */
+       kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE);
+       if (kret != KERN_SUCCESS) {
+               goto fail_vm_deallocate;
         }
  
-       /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
-       if (th != current_thread()) {
-               thread_deallocate(th);
-               return EPERM;
+       if (out_addr) {
+               *out_addr = stackaddr;
         }
+       return 0;
  
-       rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval);
-
-       /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
-       /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
-
-       thread_deallocate(th);
-
-out:
-       return rv;
+fail_vm_deallocate:
+       (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
+fail:
+       return kret;
  }
  
-static inline struct threadlist *
-util_get_thread_threadlist_entry(thread_t th)
+int
+workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr)
  {
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       if (uth) {
-               struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-               return tl;
-       }
-       return NULL;
+       return mach_vm_deallocate(vmap, stackaddr,
+                       workq_thread_allocsize(p, vmap, NULL));
  }
  
-boolean_t
-_workq_thread_has_been_unbound(thread_t th, int qos_class)
-{
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (!tl) {
-               return FALSE;
+void
+workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th,
+               vm_map_t vmap, user_addr_t stackaddr)
+{
+       // Keep this in sync with workq_setup_thread()
+       const vm_size_t       guardsize = vm_map_page_size(vmap);
+       const user_addr_t     freeaddr = (user_addr_t)stackaddr + guardsize;
+       const vm_map_offset_t freesize = vm_map_trunc_page_mask(
+                       (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1,
+                       vm_map_page_mask(vmap)) - guardsize;
+
+       __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr,
+                       freesize, VM_BEHAVIOR_REUSABLE);
+#if MACH_ASSERT
+       if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
+               os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
         }
+#endif
+}
  
-       struct workqueue *wq = tl->th_workq;
-       workqueue_lock_spin(wq);
-
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               goto failure;
-       } else if (qos_class != class_index_get_thread_qos(tl->th_priority)) {
-               goto failure;
-       }
+struct workq_thread_addrs {
+       user_addr_t self;
+       user_addr_t stack_bottom;
+       user_addr_t stack_top;
+};
  
-       if ((tl->th_flags & TH_LIST_KEVENT_BOUND)){
-               goto failure;
-       }
-       tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
+static inline void
+workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr)
+{
+       th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN);
+}
  
-       workqueue_unlock(wq);
-       return TRUE;
+static void
+workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr,
+                                          struct workq_thread_addrs *th_addrs)
+{
+       const vm_size_t guardsize = vm_map_page_size(map);
  
-failure:
-       workqueue_unlock(wq);
-       return FALSE;
+       th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE +
+                       guardsize + PTHREAD_T_OFFSET);
+       workq_thread_set_top_addr(th_addrs, th_addrs->self);
+       th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize);
  }
  
-int
-_bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval)
+static inline void
+workq_set_register_state(proc_t p, thread_t th,
+               struct workq_thread_addrs *addrs, mach_port_name_t kport,
+               user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count)
  {
-       thread_qos_policy_data_t qos;
-       mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
-       boolean_t gd = FALSE;
-       thread_t th = current_thread();
-       struct workqueue *wq = NULL;
-       struct threadlist *tl = NULL;
+       user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
+       if (!wqstart_fnptr) {
+               panic("workqueue thread start function pointer is NULL");
+       }
  
-       kern_return_t kr;
-       int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
+#if defined(__i386__) || defined(__x86_64__)
+       if (proc_is64bit_data(p) == 0) {
+               x86_thread_state32_t state = {
+                       .eip = (unsigned int)wqstart_fnptr,
+                       .eax = /* arg0 */ (unsigned int)addrs->self,
+                       .ebx = /* arg1 */ (unsigned int)kport,
+                       .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom,
+                       .edx = /* arg3 */ (unsigned int)kevent_list,
+                       .edi = /* arg4 */ (unsigned int)upcall_flags,
+                       .esi = /* arg5 */ (unsigned int)kevent_count,
  
-       if ((flags & _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND) != 0) {
-               tl = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       wq = tl->th_workq;
-               } else {
-                       goto qos;
+                       .esp = (int)((vm_offset_t)addrs->stack_top),
+               };
+
+               int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
+               if (error != KERN_SUCCESS) {
+                       panic(__func__ ": thread_set_wq_state failed: %d", error);
                 }
+       } else {
+               x86_thread_state64_t state64 = {
+                       // x86-64 already passes all the arguments in registers, so we just put them in their final place here
+                       .rip = (uint64_t)wqstart_fnptr,
+                       .rdi = (uint64_t)addrs->self,
+                       .rsi = (uint64_t)kport,
+                       .rdx = (uint64_t)addrs->stack_bottom,
+                       .rcx = (uint64_t)kevent_list,
+                       .r8  = (uint64_t)upcall_flags,
+                       .r9  = (uint64_t)kevent_count,
  
-               workqueue_lock_spin(wq);
-               if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
-                       tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
-                       unsigned int kevent_flags = KEVENT_FLAG_WORKQ | KEVENT_FLAG_UNBIND_CHECK_FLAGS;
-                       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                               kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
-                       }
+                       .rsp = (uint64_t)(addrs->stack_top)
+               };
  
-                       workqueue_unlock(wq);
-                       __assert_only int ret = kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
-                       assert(ret == 0);
-               } else {
-                       workqueue_unlock(wq);
+               int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
+               if (error != KERN_SUCCESS) {
+                       panic(__func__ ": thread_set_wq_state failed: %d", error);
                 }
         }
+#else
+#error setup_wqthread  not defined for this architecture
+#endif
+}
  
-qos:
-       if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
-               kr = pthread_kern->thread_policy_get(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
-               if (kr != KERN_SUCCESS) {
-                       qos_rv = EINVAL;
-                       goto voucher;
-               }
+static int
+workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, int upcall_flags,
+               user_addr_t eventlist, int nevents, int kevent_flags,
+               user_addr_t *kevent_list_out, int *kevent_count_out)
+{
+       bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP;
+       int kevent_count = WQ_KEVENT_LIST_LEN;
+       user_addr_t kevent_list = th_addrs->self - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
+       user_addr_t kevent_id_addr = kevent_list;
+       kqueue_id_t kevent_id = -1;
+       int ret;
  
+       if (workloop) {
                 /*
-                * If we have main-thread QoS then we don't allow a thread to come out
-                * of QOS_CLASS_UNSPECIFIED.
+                * The kevent ID goes just below the kevent list.  Sufficiently new
+                * userspace will know to look there.  Old userspace will just
+                * ignore it.
                  */
-               if (pthread_kern->qos_main_thread_active() && qos.qos_tier ==
-                               THREAD_QOS_UNSPECIFIED) {
-                       qos_rv = EPERM;
-                       goto voucher;
-               }
+               kevent_id_addr -= sizeof(kqueue_id_t);
+       }
  
-               if (!tl) {
-                       tl = util_get_thread_threadlist_entry(th);
-                       if (tl) wq = tl->th_workq;
-               }
+       user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE;
+       user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE;
  
-               PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0);
+       if (workloop) {
+               kevent_flags |= KEVENT_FLAG_WORKLOOP;
+               ret = kevent_id_internal(p, &kevent_id,
+                               eventlist, nevents, kevent_list, kevent_count,
+                               kevent_data_buf, &kevent_data_available,
+                               kevent_flags, &kevent_count);
+               copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id));
+       } else {
+               kevent_flags |= KEVENT_FLAG_WORKQ;
+               ret = kevent_qos_internal(p, -1, eventlist, nevents, kevent_list,
+                               kevent_count, kevent_data_buf, &kevent_data_available,
+                               kevent_flags, &kevent_count);
+       }
  
-               qos.qos_tier = pthread_priority_get_thread_qos(priority);
-               qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority);
+       // squash any errors into just empty output
+       if (ret != 0 || kevent_count == -1) {
+               *kevent_list_out = NULL;
+               *kevent_count_out = 0;
+               return ret;
+       }
  
-               if (qos.qos_tier == QOS_CLASS_UNSPECIFIED ||
-                               qos.tier_importance > 0 || qos.tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
-                       qos_rv = EINVAL;
-                       goto voucher;
-               }
-
-               /*
-                * If we're a workqueue, the threadlist item priority needs adjusting,
-                * along with the bucket we were running in.
-                */
-               if (tl) {
-                       bool try_run_threadreq = false;
-
-                       workqueue_lock_spin(wq);
-                       kr = pthread_kern->thread_set_workq_qos(th, qos.qos_tier, qos.tier_importance);
-                       assert(kr == KERN_SUCCESS || kr == KERN_TERMINATED);
-
-                       /* Fix up counters. */
-                       uint8_t old_bucket = tl->th_priority;
-                       uint8_t new_bucket = pthread_priority_get_class_index(priority);
-
-                       if (old_bucket != new_bucket) {
-                               _wq_thactive_move(wq, old_bucket, new_bucket);
-                               wq->wq_thscheduled_count[old_bucket]--;
-                               wq->wq_thscheduled_count[new_bucket]++;
-                               if (old_bucket == WORKQUEUE_EVENT_MANAGER_BUCKET ||
-                                               old_bucket < new_bucket) {
-                                       /*
-                                        * if the QoS of the thread was lowered, then this could
-                                        * allow for a higher QoS thread request to run, so we need
-                                        * to reevaluate.
-                                        */
-                                       try_run_threadreq = true;
-                               }
-                               tl->th_priority = new_bucket;
-                       }
-
-                       bool old_overcommit = !(tl->th_flags & TH_LIST_CONSTRAINED);
-                       bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-                       if (!old_overcommit && new_overcommit) {
-                               if (wq->wq_constrained_threads_scheduled-- ==
-                                               wq_max_constrained_threads) {
-                                       try_run_threadreq = true;
-                               }
-                               tl->th_flags &= ~TH_LIST_CONSTRAINED;
-                       } else if (old_overcommit && !new_overcommit) {
-                               wq->wq_constrained_threads_scheduled++;
-                               tl->th_flags |= TH_LIST_CONSTRAINED;
-                       }
-
-                       if (try_run_threadreq) {
-                               workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-                       } else {
-                               workqueue_unlock(wq);
-                       }
-               } else {
-                       kr = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
-                       if (kr != KERN_SUCCESS) {
-                               qos_rv = EINVAL;
-                       }
-               }
-
-               PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0);
-       }
-
-voucher:
-       if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) {
-               kr = pthread_kern->thread_set_voucher_name(voucher);
-               if (kr != KERN_SUCCESS) {
-                       voucher_rv = ENOENT;
-                       goto fixedpri;
-               }
-       }
-
-fixedpri:
-       if (qos_rv) goto done;
-       if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
-               thread_extended_policy_data_t extpol = {.timeshare = 0};
-
-               if (!tl) tl  = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       /* Not allowed on workqueue threads */
-                       fixedpri_rv = ENOTSUP;
-                       goto done;
-               }
-
-               kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
-               if (kr != KERN_SUCCESS) {
-                       fixedpri_rv = EINVAL;
-                       goto done;
-               }
-       } else if ((flags & _PTHREAD_SET_SELF_TIMESHARE_FLAG) != 0) {
-               thread_extended_policy_data_t extpol = {.timeshare = 1};
-
-               if (!tl) tl = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       /* Not allowed on workqueue threads */
-                       fixedpri_rv = ENOTSUP;
-                       goto done;
-               }
-
-               kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
-               if (kr != KERN_SUCCESS) {
-                       fixedpri_rv = EINVAL;
-                       goto done;
-               }
-       }
-
-done:
-       if (qos_rv && voucher_rv) {
-               /* Both failed, give that a unique error. */
-               return EBADMSG;
-       }
-
-       if (qos_rv) {
-               return qos_rv;
-       }
-
-       if (voucher_rv) {
-               return voucher_rv;
-       }
-
-       if (fixedpri_rv) {
-               return fixedpri_rv;
-       }
-
-       return 0;
-}
-
-int
-_bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
-{
-       thread_t th;
-       int rv = 0;
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       int override_qos = pthread_priority_get_thread_qos(priority);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (tl) {
-               PTHREAD_TRACE_WQ(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
-       }
-
-       /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
-       pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
-                       resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE, USER_ADDR_NULL, MACH_PORT_NULL);
-       thread_deallocate(th);
-       return rv;
-}
-
-int
-_bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int __unused *retval)
-{
-       thread_t th;
-       int rv = 0;
-
-       if (arg3 != 0) {
-               return EINVAL;
-       }
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (tl) {
-               PTHREAD_TRACE_WQ(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 0, 0, 0);
-       }
-
-       pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
-
-       thread_deallocate(th);
-       return rv;
-}
-
-static int
-_bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, user_addr_t ulock_addr)
-{
-       thread_t th;
-       int rv = 0;
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       int override_qos = pthread_priority_get_thread_qos(priority);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (!tl) {
-               thread_deallocate(th);
-               return EPERM;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
-
-       rv = pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
-                       resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE, ulock_addr, kport);
-
-       thread_deallocate(th);
-       return rv;
-}
-
-int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd,
-               mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
-{
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, resource, USER_ADDR_NULL);
-}
-
-int
-_bsdthread_ctl_qos_override_dispatch(struct proc *p __unused, user_addr_t cmd __unused, mach_port_name_t kport, pthread_priority_t priority, user_addr_t ulock_addr, int __unused *retval)
-{
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, USER_ADDR_NULL, ulock_addr);
-}
-
-int
-_bsdthread_ctl_qos_override_reset(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
-{
-       if (arg1 != 0 || arg2 != 0 || arg3 != 0) {
-               return EINVAL;
-       }
-
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, 1 /* reset_all */, 0, 0, retval);
-}
-
-int
-_bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval)
-{
-       if ((reset_all && (resource != 0)) || arg3 != 0) {
-               return EINVAL;
-       }
-
-       thread_t th = current_thread();
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-
-       if (!tl) {
-               return EPERM;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, 0, 0, 0, 0);
-
-       resource = reset_all ? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD : resource;
-       pthread_kern->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
-
-       return 0;
-}
-
-static int
-_bsdthread_ctl_max_parallelism(struct proc __unused *p, user_addr_t __unused cmd,
-               int qos, unsigned long flags, int *retval)
-{
-       _Static_assert(QOS_PARALLELISM_COUNT_LOGICAL ==
-                       _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL, "logical");
-       _Static_assert(QOS_PARALLELISM_REALTIME ==
-                       _PTHREAD_QOS_PARALLELISM_REALTIME, "realtime");
-
-       if (flags & ~(QOS_PARALLELISM_REALTIME | QOS_PARALLELISM_COUNT_LOGICAL)) {
-               return EINVAL;
-       }
-
-       if (flags & QOS_PARALLELISM_REALTIME) {
-               if (qos) {
-                       return EINVAL;
-               }
-       } else if (qos == THREAD_QOS_UNSPECIFIED || qos >= THREAD_QOS_LAST) {
-               return EINVAL;
-       }
-
-       *retval = pthread_kern->qos_max_parallelism(qos, flags);
-       return 0;
-}
-
-int
-_bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
-{
-       switch (cmd) {
-       case BSDTHREAD_CTL_SET_QOS:
-               return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_START:
-               return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_END:
-               return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
-               return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
-               return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD:
-               return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET:
-               return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, (int)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_SET_SELF:
-               return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval);
-       case BSDTHREAD_CTL_QOS_MAX_PARALLELISM:
-               return _bsdthread_ctl_max_parallelism(p, cmd, (int)arg1, (unsigned long)arg2, retval);
-       default:
-               return EINVAL;
-       }
-}
-
-#pragma mark - Workqueue Implementation
-
-#pragma mark wq_flags
-
-static inline uint32_t
-_wq_flags(struct workqueue *wq)
-{
-       return atomic_load_explicit(&wq->wq_flags, memory_order_relaxed);
-}
-
-static inline bool
-_wq_exiting(struct workqueue *wq)
-{
-       return _wq_flags(wq) & WQ_EXITING;
-}
-
-static inline uint32_t
-_wq_flags_or_orig(struct workqueue *wq, uint32_t v)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS
-       uint32_t state;
-       do {
-               state = _wq_flags(wq);
-       } while (!OSCompareAndSwap(state, state | v, &wq->wq_flags));
-       return state;
-#else
-       return atomic_fetch_or_explicit(&wq->wq_flags, v, memory_order_relaxed);
-#endif
-}
-
-static inline uint32_t
-_wq_flags_and_orig(struct workqueue *wq, uint32_t v)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS
-       uint32_t state;
-       do {
-               state = _wq_flags(wq);
-       } while (!OSCompareAndSwap(state, state & v, &wq->wq_flags));
-       return state;
-#else
-       return atomic_fetch_and_explicit(&wq->wq_flags, v, memory_order_relaxed);
-#endif
-}
-
-static inline bool
-WQ_TIMER_DELAYED_NEEDED(struct workqueue *wq)
-{
-       uint32_t oldflags, newflags;
-       do {
-               oldflags = _wq_flags(wq);
-               if (oldflags & (WQ_EXITING | WQ_ATIMER_DELAYED_RUNNING)) {
-                       return false;
-               }
-               newflags = oldflags | WQ_ATIMER_DELAYED_RUNNING;
-       } while (!OSCompareAndSwap(oldflags, newflags, &wq->wq_flags));
-       return true;
-}
-
-static inline bool
-WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue *wq)
-{
-       uint32_t oldflags, newflags;
-       do {
-               oldflags = _wq_flags(wq);
-               if (oldflags & (WQ_EXITING | WQ_ATIMER_IMMEDIATE_RUNNING)) {
-                       return false;
-               }
-               newflags = oldflags | WQ_ATIMER_IMMEDIATE_RUNNING;
-       } while (!OSCompareAndSwap(oldflags, newflags, &wq->wq_flags));
-       return true;
-}
-
-#pragma mark thread requests pacing
-
-static inline uint32_t
-_wq_pacing_shift_for_pri(int pri)
-{
-       return _wq_bucket_to_thread_qos(pri) - 1;
-}
-
-static inline int
-_wq_highest_paced_priority(struct workqueue *wq)
-{
-       uint8_t paced = wq->wq_paced;
-       int msb = paced ? 32 - __builtin_clz(paced) : 0; // fls(paced) == bit + 1
-       return WORKQUEUE_EVENT_MANAGER_BUCKET - msb;
-}
-
-static inline uint8_t
-_wq_pacing_bit_for_pri(int pri)
-{
-       return 1u << _wq_pacing_shift_for_pri(pri);
-}
-
-static inline bool
-_wq_should_pace_priority(struct workqueue *wq, int pri)
-{
-       return wq->wq_paced >= _wq_pacing_bit_for_pri(pri);
-}
-
-static inline void
-_wq_pacing_start(struct workqueue *wq, struct threadlist *tl)
-{
-       uint8_t bit = _wq_pacing_bit_for_pri(tl->th_priority);
-       assert((tl->th_flags & TH_LIST_PACING) == 0);
-       assert((wq->wq_paced & bit) == 0);
-       wq->wq_paced |= bit;
-       tl->th_flags |= TH_LIST_PACING;
-}
-
-static inline bool
-_wq_pacing_end(struct workqueue *wq, struct threadlist *tl)
-{
-       if (tl->th_flags & TH_LIST_PACING) {
-               uint8_t bit = _wq_pacing_bit_for_pri(tl->th_priority);
-               assert((wq->wq_paced & bit) != 0);
-               wq->wq_paced ^= bit;
-               tl->th_flags &= ~TH_LIST_PACING;
-               return wq->wq_paced < bit; // !_wq_should_pace_priority
-       }
-       return false;
-}
-
-#pragma mark thread requests
-
-static void
-_threadreq_init_alloced(struct threadreq *req, int priority, int flags)
-{
-       assert((flags & TR_FLAG_ONSTACK) == 0);
-       req->tr_state = TR_STATE_NEW;
-       req->tr_priority = priority;
-       req->tr_flags = flags;
-}
-
-static void
-_threadreq_init_stack(struct threadreq *req, int priority, int flags)
-{
-       req->tr_state = TR_STATE_NEW;
-       req->tr_priority = priority;
-       req->tr_flags = flags | TR_FLAG_ONSTACK;
-}
-
-static void
-_threadreq_copy_prepare(struct workqueue *wq)
-{
-again:
-       if (wq->wq_cached_threadreq) {
-               return;
-       }
-
-       workqueue_unlock(wq);
-       struct threadreq *req = zalloc(pthread_zone_threadreq);
-       workqueue_lock_spin(wq);
-
-       if (wq->wq_cached_threadreq) {
-               /*
-                * We lost the race and someone left behind an extra threadreq for us
-                * to use.  Throw away our request and retry.
-                */
-               workqueue_unlock(wq);
-               zfree(pthread_zone_threadreq, req);
-               workqueue_lock_spin(wq);
-               goto again;
-       } else {
-               wq->wq_cached_threadreq = req;
-       }
-
-       assert(wq->wq_cached_threadreq);
-}
-
-static bool
-_threadreq_copy_prepare_noblock(struct workqueue *wq)
-{
-       if (wq->wq_cached_threadreq) {
-               return true;
-       }
-
-       wq->wq_cached_threadreq = zalloc_noblock(pthread_zone_threadreq);
-
-       return wq->wq_cached_threadreq != NULL;
-}
-
-static inline struct threadreq_head *
-_threadreq_list_for_req(struct workqueue *wq, const struct threadreq *req)
-{
-       if (req->tr_flags & TR_FLAG_OVERCOMMIT) {
-               return &wq->wq_overcommit_reqlist[req->tr_priority];
-       } else {
-               return &wq->wq_reqlist[req->tr_priority];
-       }
-}
-
-static void
-_threadreq_enqueue(struct workqueue *wq, struct threadreq *req)
-{
-       assert(req && req->tr_state == TR_STATE_NEW);
-       if (req->tr_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               assert(wq->wq_event_manager_threadreq.tr_state != TR_STATE_WAITING);
-               memcpy(&wq->wq_event_manager_threadreq, req, sizeof(struct threadreq));
-               req = &wq->wq_event_manager_threadreq;
-               req->tr_flags &= ~(TR_FLAG_ONSTACK | TR_FLAG_NO_PACING);
-       } else {
-               if (req->tr_flags & TR_FLAG_ONSTACK) {
-                       assert(wq->wq_cached_threadreq);
-                       struct threadreq *newreq = wq->wq_cached_threadreq;
-                       wq->wq_cached_threadreq = NULL;
-
-                       memcpy(newreq, req, sizeof(struct threadreq));
-                       newreq->tr_flags &= ~(TR_FLAG_ONSTACK | TR_FLAG_NO_PACING);
-                       req->tr_state = TR_STATE_DEAD;
-                       req = newreq;
-               }
-               TAILQ_INSERT_TAIL(_threadreq_list_for_req(wq, req), req, tr_entry);
-       }
-       req->tr_state = TR_STATE_WAITING;
-       wq->wq_reqcount++;
-}
-
-static void
-_threadreq_dequeue(struct workqueue *wq, struct threadreq *req)
-{
-       if (req->tr_priority != WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               struct threadreq_head *req_list = _threadreq_list_for_req(wq, req);
-#if DEBUG
-               struct threadreq *cursor = NULL;
-               TAILQ_FOREACH(cursor, req_list, tr_entry) {
-                       if (cursor == req) break;
-               }
-               assert(cursor == req);
-#endif
-               TAILQ_REMOVE(req_list, req, tr_entry);
-       }
-       wq->wq_reqcount--;
-}
-
-/*
- * Mark a thread request as complete.  At this point, it is treated as owned by
- * the submitting subsystem and you should assume it could be freed.
- *
- * Called with the workqueue lock held.
- */
-static int
-_threadreq_complete_and_unlock(proc_t p, struct workqueue *wq,
-               struct threadreq *req, struct threadlist *tl)
-{
-       struct threadreq *req_tofree = NULL;
-       bool sync = (req->tr_state == TR_STATE_NEW);
-       bool workloop = req->tr_flags & TR_FLAG_WORKLOOP;
-       bool onstack = req->tr_flags & TR_FLAG_ONSTACK;
-       bool kevent = req->tr_flags & TR_FLAG_KEVENT;
-       bool unbinding = tl->th_flags & TH_LIST_UNBINDING;
-       bool locked = true;
-       bool waking_parked_thread = (tl->th_flags & TH_LIST_BUSY);
-       int ret;
-
-       req->tr_state = TR_STATE_COMPLETE;
-
-       if (!workloop && !onstack && req != &wq->wq_event_manager_threadreq) {
-               if (wq->wq_cached_threadreq) {
-                       req_tofree = req;
-               } else {
-                       wq->wq_cached_threadreq = req;
-               }
-       }
-
-       if (tl->th_flags & TH_LIST_UNBINDING) {
-               tl->th_flags &= ~TH_LIST_UNBINDING;
-               assert((tl->th_flags & TH_LIST_KEVENT_BOUND));
-       } else if (workloop || kevent) {
-               assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-               tl->th_flags |= TH_LIST_KEVENT_BOUND;
-       }
-
-       if (workloop) {
-               workqueue_unlock(wq);
-               ret = pthread_kern->workloop_fulfill_threadreq(wq->wq_proc, (void*)req,
-                               tl->th_thread, sync ? WORKLOOP_FULFILL_THREADREQ_SYNC : 0);
-               assert(ret == 0);
-               locked = false;
-       } else if (kevent) {
-               unsigned int kevent_flags = KEVENT_FLAG_WORKQ;
-               if (sync) {
-                       kevent_flags |= KEVENT_FLAG_SYNCHRONOUS_BIND;
-               }
-               if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                       kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
-               }
-               workqueue_unlock(wq);
-               ret = kevent_qos_internal_bind(wq->wq_proc,
-                               class_index_get_thread_qos(tl->th_priority), tl->th_thread,
-                               kevent_flags);
-               if (ret != 0) {
-                       workqueue_lock_spin(wq);
-                       tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
-                       locked = true;
-               } else {
-                       locked = false;
-               }
-       }
-
-       /*
-        * Run Thread, Run!
-        */
-       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 0, 0, 0, 0);
-       PTHREAD_TRACE_WQ_REQ(TRACE_wq_runitem | DBG_FUNC_START, wq, req, tl->th_priority,
-                       thread_tid(current_thread()), thread_tid(tl->th_thread));
-
-       if (waking_parked_thread) {
-               if (!locked) {
-                       workqueue_lock_spin(wq);
-               }
-               tl->th_flags &= ~(TH_LIST_BUSY);
-               if ((tl->th_flags & TH_LIST_REMOVING_VOUCHER) == 0) {
-                       /*
-                        * If the thread is in the process of removing its voucher, then it
-                        * isn't actually in the wait event yet and we don't need to wake
-                        * it up.  Save the trouble (and potential lock-ordering issues
-                        * (see 30617015)).
-                        */
-                       thread_wakeup_thread(tl, tl->th_thread);
-               }
-               workqueue_unlock(wq);
-
-               if (req_tofree) zfree(pthread_zone_threadreq, req_tofree);
-               return WQ_RUN_TR_THREAD_STARTED;
-       }
-
-       assert ((tl->th_flags & TH_LIST_PACING) == 0);
-       if (locked) {
-               workqueue_unlock(wq);
-       }
-       if (req_tofree) zfree(pthread_zone_threadreq, req_tofree);
-       if (unbinding) {
-               return WQ_RUN_TR_THREAD_STARTED;
-       }
-       _setup_wqthread(p, tl->th_thread, wq, tl, WQ_SETUP_CLEAR_VOUCHER);
-       pthread_kern->unix_syscall_return(EJUSTRETURN);
-       __builtin_unreachable();
-}
-
-/*
- * Mark a thread request as cancelled.  Has similar ownership semantics to the
- * complete call above.
- */
-static void
-_threadreq_cancel(struct workqueue *wq, struct threadreq *req)
-{
-       assert(req->tr_state == TR_STATE_WAITING);
-       req->tr_state = TR_STATE_DEAD;
-
-       assert((req->tr_flags & TR_FLAG_ONSTACK) == 0);
-       if (req->tr_flags & TR_FLAG_WORKLOOP) {
-               __assert_only int ret;
-               ret = pthread_kern->workloop_fulfill_threadreq(wq->wq_proc, (void*)req,
-                               THREAD_NULL, WORKLOOP_FULFILL_THREADREQ_CANCEL);
-               assert(ret == 0 || ret == ECANCELED);
-       } else if (req != &wq->wq_event_manager_threadreq) {
-               zfree(pthread_zone_threadreq, req);
-       }
-}
-
-#pragma mark workqueue lock
-
-static boolean_t workqueue_lock_spin_is_acquired_kdp(struct workqueue *wq) {
-  return kdp_lck_spin_is_acquired(&wq->wq_lock);
-}
-
-static void
-workqueue_lock_spin(struct workqueue *wq)
-{
-       assert(ml_get_interrupts_enabled() == TRUE);
-       lck_spin_lock(&wq->wq_lock);
-}
-
-static bool
-workqueue_lock_try(struct workqueue *wq)
-{
-       return lck_spin_try_lock(&wq->wq_lock);
-}
-
-static void
-workqueue_unlock(struct workqueue *wq)
-{
-       lck_spin_unlock(&wq->wq_lock);
-}
-
-#pragma mark workqueue add timer
-
-/**
- * Sets up the timer which will call out to workqueue_add_timer
- */
-static void
-workqueue_interval_timer_start(struct workqueue *wq)
-{
-       uint64_t deadline;
-
-       /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the
-        ATIMER_RUNNING flag is not present.  The net effect here is that if a
-        sequence of threads is required, we'll double the time before we give out
-        the next one. */
-       if (wq->wq_timer_interval == 0) {
-               wq->wq_timer_interval = wq_stalled_window_usecs;
-
-       } else {
-               wq->wq_timer_interval = wq->wq_timer_interval * 2;
-
-               if (wq->wq_timer_interval > wq_max_timer_interval_usecs) {
-                       wq->wq_timer_interval = wq_max_timer_interval_usecs;
-               }
-       }
-       clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
-                       _wq_flags(wq), wq->wq_timer_interval, 0);
-
-       thread_call_t call = wq->wq_atimer_delayed_call;
-       if (thread_call_enter1_delayed(call, call, deadline)) {
-               panic("delayed_call was already enqueued");
-       }
-}
-
-/**
- * Immediately trigger the workqueue_add_timer
- */
-static void
-workqueue_interval_timer_trigger(struct workqueue *wq)
-{
-       PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
-                       _wq_flags(wq), 0, 0);
-
-       thread_call_t call = wq->wq_atimer_immediate_call;
-       if (thread_call_enter1(call, call)) {
-               panic("immediate_call was already enqueued");
-       }
-}
-
-/**
- * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts
- */
-static boolean_t
-wq_thread_is_busy(uint64_t cur_ts, _Atomic uint64_t *lastblocked_tsp)
-{
-       clock_sec_t     secs;
-       clock_usec_t    usecs;
-       uint64_t lastblocked_ts;
-       uint64_t elapsed;
-
-       lastblocked_ts = atomic_load_explicit(lastblocked_tsp, memory_order_relaxed);
-       if (lastblocked_ts >= cur_ts) {
-               /*
-                * because the update of the timestamp when a thread blocks isn't
-                * serialized against us looking at it (i.e. we don't hold the workq lock)
-                * it's possible to have a timestamp that matches the current time or
-                * that even looks to be in the future relative to when we grabbed the current
-                * time... just treat this as a busy thread since it must have just blocked.
-                */
-               return (TRUE);
-       }
-       elapsed = cur_ts - lastblocked_ts;
-
-       pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
-
-       return (secs == 0 && usecs < wq_stalled_window_usecs);
-}
-
-/**
- * handler function for the timer
- */
-static void
-workqueue_add_timer(struct workqueue *wq, thread_call_t thread_call_self)
-{
-       proc_t p = wq->wq_proc;
-
-       workqueue_lock_spin(wq);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq,
-                       _wq_flags(wq), wq->wq_nthreads, wq->wq_thidlecount, 0);
-
-       /*
-        * There's two tricky issues here.
-        *
-        * First issue: we start the thread_call's that invoke this routine without
-        * the workqueue lock held.  The scheduler callback needs to trigger
-        * reevaluation of the number of running threads but shouldn't take that
-        * lock, so we can't use it to synchronize state around the thread_call.
-        * As a result, it might re-enter the thread_call while this routine is
-        * already running.  This could cause it to fire a second time and we'll
-        * have two add_timers running at once.  Obviously, we don't want that to
-        * keep stacking, so we need to keep it at two timers.
-        *
-        * Solution: use wq_flags (accessed via atomic CAS) to synchronize the
-        * enqueue of the thread_call itself.  When a thread needs to trigger the
-        * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets
-        * the flag then does a thread_call_enter.  We'll then remove that flag
-        * only once we've got the lock and it's safe for the thread_call to be
-        * entered again.
-        *
-        * Second issue: we need to make sure that the two timers don't execute this
-        * routine concurrently.  We can't use the workqueue lock for this because
-        * we'll need to drop it during our execution.
-        *
-        * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that
-        * we are currently executing the routine and the next thread should wait.
-        *
-        * After all that, we arrive at the following four possible states:
-        * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY       no pending timer, no active timer
-        * !WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY       no pending timer,  1 active timer
-        *  WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY        1 pending timer, no active timer
-        *  WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY        1 pending timer,  1 active timer
-        *
-        * Further complication sometimes we need to trigger this function to run
-        * without delay.  Because we aren't under a lock between setting
-        * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply
-        * re-enter the thread call: if thread_call_enter() returned false, we
-        * wouldn't be able to distinguish the case where the thread_call had
-        * already fired from the case where it hadn't been entered yet from the
-        * other thread.  So, we use a separate thread_call for immediate
-        * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING.
-        */
-
-       while (wq->wq_lflags & WQL_ATIMER_BUSY) {
-               wq->wq_lflags |= WQL_ATIMER_WAITING;
-
-               assert_wait((caddr_t)wq, (THREAD_UNINT));
-               workqueue_unlock(wq);
-
-               thread_block(THREAD_CONTINUE_NULL);
-
-               workqueue_lock_spin(wq);
-       }
-       /*
-        * Prevent _workqueue_mark_exiting() from going away
-        */
-       wq->wq_lflags |= WQL_ATIMER_BUSY;
-
-       /*
-        * Decide which timer we are and remove the RUNNING flag.
-        */
-       if (thread_call_self == wq->wq_atimer_delayed_call) {
-               uint64_t wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_DELAYED_RUNNING);
-               if ((wq_flags & WQ_ATIMER_DELAYED_RUNNING) == 0) {
-                       panic("workqueue_add_timer(delayed) w/o WQ_ATIMER_DELAYED_RUNNING");
-               }
-       } else if (thread_call_self == wq->wq_atimer_immediate_call) {
-               uint64_t wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_IMMEDIATE_RUNNING);
-               if ((wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) == 0) {
-                       panic("workqueue_add_timer(immediate) w/o WQ_ATIMER_IMMEDIATE_RUNNING");
-               }
-       } else {
-               panic("workqueue_add_timer can't figure out which timer it is");
-       }
-
-       int ret = WQ_RUN_TR_THREAD_STARTED;
-       while (ret == WQ_RUN_TR_THREAD_STARTED && wq->wq_reqcount) {
-               ret = workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-
-               workqueue_lock_spin(wq);
-       }
-       _threadreq_copy_prepare(wq);
-
-       /*
-        * If we called WQ_TIMER_NEEDED above, then this flag will be set if that
-        * call marked the timer running.  If so, we let the timer interval grow.
-        * Otherwise, we reset it back to 0.
-        */
-       uint32_t wq_flags = _wq_flags(wq);
-       if (!(wq_flags & WQ_ATIMER_DELAYED_RUNNING)) {
-               wq->wq_timer_interval = 0;
-       }
-
-       wq->wq_lflags &= ~WQL_ATIMER_BUSY;
-
-       if ((wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
-               /*
-                * wakeup the thread hung up in _workqueue_mark_exiting or
-                * workqueue_add_timer waiting for this timer to finish getting out of
-                * the way
-                */
-               wq->wq_lflags &= ~WQL_ATIMER_WAITING;
-               wakeup(wq);
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, 0, wq->wq_nthreads, wq->wq_thidlecount, 0);
-
-       workqueue_unlock(wq);
-}
-
-#pragma mark thread state tracking
-
-// called by spinlock code when trying to yield to lock owner
-void
-_workqueue_thread_yielded(void)
-{
-}
-
-static void
-workqueue_callback(int type, thread_t thread)
-{
-       struct uthread *uth = pthread_kern->get_bsdthread_info(thread);
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-       struct workqueue *wq = tl->th_workq;
-       uint32_t old_count, req_qos, qos = tl->th_priority;
-       wq_thactive_t old_thactive;
-
-       switch (type) {
-       case SCHED_CALL_BLOCK: {
-               bool start_timer = false;
-
-               old_thactive = _wq_thactive_dec(wq, tl->th_priority);
-               req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
-               old_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-                               qos, NULL, NULL);
-
-               if (old_count == wq_max_concurrency[tl->th_priority]) {
-                       /*
-                        * The number of active threads at this priority has fallen below
-                        * the maximum number of concurrent threads that are allowed to run
-                        *
-                        * if we collide with another thread trying to update the
-                        * last_blocked (really unlikely since another thread would have to
-                        * get scheduled and then block after we start down this path), it's
-                        * not a problem.  Either timestamp is adequate, so no need to retry
-                        */
-                       atomic_store_explicit(&wq->wq_lastblocked_ts[qos],
-                                       mach_absolute_time(), memory_order_relaxed);
-               }
-
-               if (req_qos == WORKQUEUE_EVENT_MANAGER_BUCKET || qos > req_qos) {
-                       /*
-                        * The blocking thread is at a lower QoS than the highest currently
-                        * pending constrained request, nothing has to be redriven
-                        */
-               } else {
-                       uint32_t max_busycount, old_req_count;
-                       old_req_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-                                       req_qos, NULL, &max_busycount);
-                       /*
-                        * If it is possible that may_start_constrained_thread had refused
-                        * admission due to being over the max concurrency, we may need to
-                        * spin up a new thread.
-                        *
-                        * We take into account the maximum number of busy threads
-                        * that can affect may_start_constrained_thread as looking at the
-                        * actual number may_start_constrained_thread will see is racy.
-                        *
-                        * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is
-                        * between NCPU (4) and NCPU - 2 (2) we need to redrive.
-                        */
-                       if (wq_max_concurrency[req_qos] <= old_req_count + max_busycount &&
-                                       old_req_count <= wq_max_concurrency[req_qos]) {
-                               if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-                                       start_timer = true;
-                                       workqueue_interval_timer_start(wq);
-                               }
-                       }
-               }
-
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq,
-                               old_count - 1, qos | (req_qos << 8),
-                               wq->wq_reqcount << 1 | start_timer, 0);
-               break;
-       }
-       case SCHED_CALL_UNBLOCK: {
-               /*
-                * we cannot take the workqueue_lock here...
-                * an UNBLOCK can occur from a timer event which
-                * is run from an interrupt context... if the workqueue_lock
-                * is already held by this processor, we'll deadlock...
-                * the thread lock for the thread being UNBLOCKED
-                * is also held
-                */
-               old_thactive = _wq_thactive_inc(wq, qos);
-               if (pthread_debug_tracing) {
-                       req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
-                       old_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-                                       qos, NULL, NULL);
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq,
-                                       old_count + 1, qos | (req_qos << 8),
-                                       wq->wq_threads_scheduled, 0);
-               }
-               break;
-       }
-       }
-}
-
-sched_call_t
-_workqueue_get_sched_callback(void)
-{
-       return workqueue_callback;
-}
-
-#pragma mark thread addition/removal
-
-static mach_vm_size_t
-_workqueue_allocsize(struct workqueue *wq)
-{
-       proc_t p = wq->wq_proc;
-       mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
-       mach_vm_size_t pthread_size =
-               vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
-       return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
-}
-
-/**
- * pop goes the thread
- *
- * If fromexit is set, the call is from workqueue_exit(,
- * so some cleanups are to be avoided.
- */
-static void
-workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use)
-{
-       struct uthread * uth;
-       struct workqueue * wq = tl->th_workq;
-
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
-       } else {
-               TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-       }
-
-       if (fromexit == 0) {
-               assert(wq->wq_nthreads && wq->wq_thidlecount);
-               wq->wq_nthreads--;
-               wq->wq_thidlecount--;
-       }
-
-       /*
-        * Clear the threadlist pointer in uthread so
-        * blocked thread on wakeup for termination will
-        * not access the thread list as it is going to be
-        * freed.
-        */
-       pthread_kern->thread_sched_call(tl->th_thread, NULL);
-
-       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-       if (uth != (struct uthread *)0) {
-               pthread_kern->uthread_set_threadlist(uth, NULL);
-       }
-       if (fromexit == 0) {
-               /* during exit the lock is not held */
-               workqueue_unlock(wq);
-       }
-
-       if ( (tl->th_flags & TH_LIST_NEW) || first_use ) {
-               /*
-                * thread was created, but never used...
-                * need to clean up the stack and port ourselves
-                * since we're not going to spin up through the
-                * normal exit path triggered from Libc
-                */
-               if (fromexit == 0) {
-                       /* vm map is already deallocated when this is called from exit */
-                       (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, _workqueue_allocsize(wq));
-               }
-               (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport);
-       }
-       /*
-        * drop our ref on the thread
-        */
-       thread_deallocate(tl->th_thread);
-
-       zfree(pthread_zone_threadlist, tl);
-}
-
-
-/**
- * Try to add a new workqueue thread.
- *
- * - called with workq lock held
- * - dropped and retaken around thread creation
- * - return with workq lock held
- */
-static bool
-workqueue_addnewthread(proc_t p, struct workqueue *wq)
-{
-       kern_return_t kret;
-
-       wq->wq_nthreads++;
-
-       workqueue_unlock(wq);
-
-       struct threadlist *tl = zalloc(pthread_zone_threadlist);
-       bzero(tl, sizeof(struct threadlist));
-
-       thread_t th;
-       kret = pthread_kern->thread_create_workq_waiting(wq->wq_task, wq_unpark_continue, tl, &th);
-       if (kret != KERN_SUCCESS) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 0, 0, 0);
-               goto fail_free;
-       }
-
-       mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
-
-       mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
-       mach_vm_size_t pthread_size =
-               vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
-       mach_vm_size_t th_allocsize = guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
-
-       kret = mach_vm_map(wq->wq_map, &stackaddr,
-                       th_allocsize, page_size-1,
-                       VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, NULL,
-                       0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-                       VM_INHERIT_DEFAULT);
-
-       if (kret != KERN_SUCCESS) {
-               kret = mach_vm_allocate(wq->wq_map,
-                               &stackaddr, th_allocsize,
-                               VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
-       }
-
-       if (kret != KERN_SUCCESS) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 1, 0, 0);
-               goto fail_terminate;
-       }
-
-       /*
-        * The guard page is at the lowest address
-        * The stack base is the highest address
-        */
-       kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
-       if (kret != KERN_SUCCESS) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 2, 0, 0);
-               goto fail_vm_deallocate;
-       }
-
-
-       pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE);
-       pthread_kern->thread_static_param(th, TRUE);
-
-       /*
-        * convert_thread_to_port() consumes a reference
-        */
-       thread_reference(th);
-       void *sright = (void *)pthread_kern->convert_thread_to_port(th);
-       tl->th_thport = pthread_kern->ipc_port_copyout_send(sright,
-                       pthread_kern->task_get_ipcspace(wq->wq_task));
-
-       tl->th_flags = TH_LIST_INITED | TH_LIST_NEW;
-       tl->th_thread = th;
-       tl->th_workq = wq;
-       tl->th_stackaddr = stackaddr;
-       tl->th_priority = WORKQUEUE_NUM_BUCKETS;
-
-       struct uthread *uth;
-       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-
-       workqueue_lock_spin(wq);
-
-       void *current_tl = pthread_kern->uthread_get_threadlist(uth);
-       if (current_tl == NULL) {
-               pthread_kern->uthread_set_threadlist(uth, tl);
-               TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
-               wq->wq_thidlecount++;
-       } else if (current_tl == WQ_THREADLIST_EXITING_POISON) {
-               /*
-                * Failed thread creation race: The thread already woke up and has exited.
-                */
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 3, 0, 0);
-               goto fail_unlock;
-       } else {
-               panic("Unexpected initial threadlist value");
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
-
-       return (TRUE);
-
-fail_unlock:
-       workqueue_unlock(wq);
-       (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task),
-                       tl->th_thport);
-
-fail_vm_deallocate:
-       (void) mach_vm_deallocate(wq->wq_map, stackaddr, th_allocsize);
-
-fail_terminate:
-       if (pthread_kern->thread_will_park_or_terminate) {
-               pthread_kern->thread_will_park_or_terminate(th);
-       }
-       (void)thread_terminate(th);
-       thread_deallocate(th);
-
-fail_free:
-       zfree(pthread_zone_threadlist, tl);
-
-       workqueue_lock_spin(wq);
-       wq->wq_nthreads--;
-
-       return (FALSE);
-}
-
-/**
- * Setup per-process state for the workqueue.
- */
-int
-_workq_open(struct proc *p, __unused int32_t *retval)
-{
-       struct workqueue * wq;
-       char * ptr;
-       uint32_t num_cpus;
-       int error = 0;
-
-       if (pthread_kern->proc_get_register(p) == 0) {
-               return EINVAL;
-       }
-
-       num_cpus = pthread_kern->ml_get_max_cpus();
-
-       if (wq_init_constrained_limit) {
-               uint32_t limit;
-               /*
-                * set up the limit for the constrained pool
-                * this is a virtual pool in that we don't
-                * maintain it on a separate idle and run list
-                */
-               limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
-
-               if (limit > wq_max_constrained_threads)
-                       wq_max_constrained_threads = limit;
-
-               wq_init_constrained_limit = 0;
-
-               if (wq_max_threads > WQ_THACTIVE_BUCKET_HALF) {
-                       wq_max_threads = WQ_THACTIVE_BUCKET_HALF;
-               }
-               if (wq_max_threads > pthread_kern->config_thread_max - 20) {
-                       wq_max_threads = pthread_kern->config_thread_max - 20;
-               }
-       }
-
-       if (pthread_kern->proc_get_wqptr(p) == NULL) {
-               if (pthread_kern->proc_init_wqptr_or_wait(p) == FALSE) {
-                       assert(pthread_kern->proc_get_wqptr(p) != NULL);
-                       goto out;
-               }
-
-               ptr = (char *)zalloc(pthread_zone_workqueue);
-               bzero(ptr, sizeof(struct workqueue));
-
-               wq = (struct workqueue *)ptr;
-               wq->wq_proc = p;
-               wq->wq_task = current_task();
-               wq->wq_map  = pthread_kern->current_map();
-
-               // Start the event manager at the priority hinted at by the policy engine
-               int mgr_priority_hint = pthread_kern->task_get_default_manager_qos(current_task());
-               wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-
-               TAILQ_INIT(&wq->wq_thrunlist);
-               TAILQ_INIT(&wq->wq_thidlelist);
-               for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-                       TAILQ_INIT(&wq->wq_overcommit_reqlist[i]);
-                       TAILQ_INIT(&wq->wq_reqlist[i]);
-               }
-
-               wq->wq_atimer_delayed_call =
-                               thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
-                                               (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
-               wq->wq_atimer_immediate_call =
-                               thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
-                                               (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
-
-               lck_spin_init(&wq->wq_lock, pthread_lck_grp, pthread_lck_attr);
-
-               wq->wq_cached_threadreq = zalloc(pthread_zone_threadreq);
-               *(wq_thactive_t *)&wq->wq_thactive =
-                               (wq_thactive_t)WQ_THACTIVE_NO_PENDING_REQUEST <<
-                               WQ_THACTIVE_QOS_SHIFT;
-
-               pthread_kern->proc_set_wqptr(p, wq);
-
-       }
-out:
-
-       return(error);
-}
-
-/*
- * Routine:    workqueue_mark_exiting
- *
- * Function:   Mark the work queue such that new threads will not be added to the
- *             work queue after we return.
- *
- * Conditions: Called against the current process.
- */
-void
-_workqueue_mark_exiting(struct proc *p)
-{
-       struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
-       if (!wq) return;
-
-       PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-       workqueue_lock_spin(wq);
-
-       /*
-        * We arm the add timer without holding the workqueue lock so we need
-        * to synchronize with any running or soon to be running timers.
-        *
-        * Threads that intend to arm the timer atomically OR
-        * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if
-        * WQ_EXITING is not present.  So, once we have set WQ_EXITING, we can
-        * be sure that no new RUNNING flags will be set, but still need to
-        * wait for the already running timers to complete.
-        *
-        * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so
-        * the check for and sleep until clear is protected.
-        */
-       uint64_t wq_flags = _wq_flags_or_orig(wq, WQ_EXITING);
-
-       if (wq_flags & WQ_ATIMER_DELAYED_RUNNING) {
-               if (thread_call_cancel(wq->wq_atimer_delayed_call) == TRUE) {
-                       wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_DELAYED_RUNNING);
-               }
-       }
-       if (wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) {
-               if (thread_call_cancel(wq->wq_atimer_immediate_call) == TRUE) {
-                       wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_IMMEDIATE_RUNNING);
-               }
-       }
-       while ((_wq_flags(wq) & (WQ_ATIMER_DELAYED_RUNNING | WQ_ATIMER_IMMEDIATE_RUNNING)) ||
-                       (wq->wq_lflags & WQL_ATIMER_BUSY)) {
-               assert_wait((caddr_t)wq, (THREAD_UNINT));
-               workqueue_unlock(wq);
-
-               thread_block(THREAD_CONTINUE_NULL);
-
-               workqueue_lock_spin(wq);
-       }
-
-       /*
-        * Save off pending requests, will complete/free them below after unlocking
-        */
-       TAILQ_HEAD(, threadreq) local_list = TAILQ_HEAD_INITIALIZER(local_list);
-
-       for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-               TAILQ_CONCAT(&local_list, &wq->wq_overcommit_reqlist[i], tr_entry);
-               TAILQ_CONCAT(&local_list, &wq->wq_reqlist[i], tr_entry);
-       }
-
-       /*
-        * XXX: Can't deferred cancel the event manager request, so just smash it.
-        */
-       assert((wq->wq_event_manager_threadreq.tr_flags & TR_FLAG_WORKLOOP) == 0);
-       wq->wq_event_manager_threadreq.tr_state = TR_STATE_DEAD;
-
-       workqueue_unlock(wq);
-
-       struct threadreq *tr, *tr_temp;
-       TAILQ_FOREACH_SAFE(tr, &local_list, tr_entry, tr_temp) {
-               _threadreq_cancel(wq, tr);
-       }
-       PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
-}
-
-/*
- * Routine:    workqueue_exit
- *
- * Function:   clean up the work queue structure(s) now that there are no threads
- *             left running inside the work queue (except possibly current_thread).
- *
- * Conditions: Called by the last thread in the process.
- *             Called against current process.
- */
-void
-_workqueue_exit(struct proc *p)
-{
-       struct workqueue  * wq;
-       struct threadlist  * tl, *tlist;
-       struct uthread  *uth;
-
-       wq = pthread_kern->proc_get_wqptr(p);
-       if (wq != NULL) {
-
-               PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-               pthread_kern->proc_set_wqptr(p, NULL);
-
-               /*
-                * Clean up workqueue data structures for threads that exited and
-                * didn't get a chance to clean up after themselves.
-                */
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) != 0);
-
-                       pthread_kern->thread_sched_call(tl->th_thread, NULL);
-
-                       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-                       if (uth != (struct uthread *)0) {
-                               pthread_kern->uthread_set_threadlist(uth, NULL);
-                       }
-                       TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-
-                       /*
-                        * drop our last ref on the thread
-                        */
-                       thread_deallocate(tl->th_thread);
-
-                       zfree(pthread_zone_threadlist, tl);
-               }
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-                       assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
-                       workqueue_removethread(tl, true, false);
-               }
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlemgrlist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-                       assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-                       workqueue_removethread(tl, true, false);
-               }
-               if (wq->wq_cached_threadreq) {
-                       zfree(pthread_zone_threadreq, wq->wq_cached_threadreq);
-               }
-               thread_call_free(wq->wq_atimer_delayed_call);
-               thread_call_free(wq->wq_atimer_immediate_call);
-               lck_spin_destroy(&wq->wq_lock, pthread_lck_grp);
-
-               for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-                       assert(TAILQ_EMPTY(&wq->wq_overcommit_reqlist[i]));
-                       assert(TAILQ_EMPTY(&wq->wq_reqlist[i]));
-               }
-
-               zfree(pthread_zone_workqueue, wq);
-
-               PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
-       }
-}
-
-
-#pragma mark workqueue thread manipulation
-
-
-/**
- * Entry point for libdispatch to ask for threads
- */
-static int
-wqops_queue_reqthreads(struct proc *p, int reqcount,
-               pthread_priority_t priority)
-{
-       bool overcommit = _pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-       bool event_manager = _pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-       int class = event_manager ? WORKQUEUE_EVENT_MANAGER_BUCKET :
-                       pthread_priority_get_class_index(priority);
-
-       if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS) ||
-                       (overcommit && event_manager)) {
-               return EINVAL;
-       }
-
-       struct workqueue *wq;
-       if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       workqueue_lock_spin(wq);
-       _threadreq_copy_prepare(wq);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_wqops_reqthreads | DBG_FUNC_NONE, wq, reqcount, priority, 0, 0);
-
-       int tr_flags = 0;
-       if (overcommit) tr_flags |= TR_FLAG_OVERCOMMIT;
-       if (reqcount > 1) {
-               /*
-                * when libdispatch asks for more than one thread, it wants to achieve
-                * parallelism. Pacing would be detrimental to this ask, so treat
-                * these specially to not do the pacing admission check
-                */
-               tr_flags |= TR_FLAG_NO_PACING;
-       }
-
-       while (reqcount-- && !_wq_exiting(wq)) {
-               struct threadreq req;
-               _threadreq_init_stack(&req, class, tr_flags);
-
-               workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, true);
-
-               workqueue_lock_spin(wq); /* reacquire */
-               _threadreq_copy_prepare(wq);
-       }
-
-       workqueue_unlock(wq);
-
-       return 0;
-}
-
-/*
- * Used by the kevent system to request threads.
- *
- * Currently count is ignored and we always return one thread per invocation.
- */
-static thread_t
-_workq_kevent_reqthreads(struct proc *p, pthread_priority_t priority,
-               bool no_emergency)
-{
-       int wq_run_tr = WQ_RUN_TR_THROTTLED;
-       bool emergency_thread = false;
-       struct threadreq req;
-
-
-       struct workqueue *wq;
-       if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return THREAD_NULL;
-       }
-
-       int class = pthread_priority_get_class_index(priority);
-
-       workqueue_lock_spin(wq);
-       bool has_threadreq = _threadreq_copy_prepare_noblock(wq);
-
-       PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, NULL, priority, 0, 0);
-
-       /*
-        * Skip straight to event manager if that's what was requested
-        */
-       if ((_pthread_priority_get_qos_newest(priority) == QOS_CLASS_UNSPECIFIED) ||
-                       (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)){
-               goto event_manager;
-       }
-
-       bool will_pace = _wq_should_pace_priority(wq, class);
-       if ((wq->wq_thidlecount == 0 || will_pace) && has_threadreq == false) {
-               /*
-                * We'll need to persist the request and can't, so return the emergency
-                * thread instead, which has a persistent request object.
-                */
-               emergency_thread = true;
-               goto event_manager;
-       }
-
-       /*
-        * Handle overcommit requests
-        */
-       if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
-               _threadreq_init_stack(&req, class, TR_FLAG_KEVENT | TR_FLAG_OVERCOMMIT);
-               wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-               goto done;
-       }
-
-       /*
-        * Handle constrained requests
-        */
-       boolean_t may_start = may_start_constrained_thread(wq, class, NULL, false);
-       if (may_start || no_emergency) {
-               _threadreq_init_stack(&req, class, TR_FLAG_KEVENT);
-               wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-               goto done;
-       } else {
-               emergency_thread = true;
-       }
-
-
-event_manager:
-       _threadreq_init_stack(&req, WORKQUEUE_EVENT_MANAGER_BUCKET, TR_FLAG_KEVENT);
-       wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-
-done:
-       if (wq_run_tr == WQ_RUN_TR_THREAD_NEEDED && WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-               workqueue_interval_timer_trigger(wq);
-       }
-       return emergency_thread ? (void*)-1 : 0;
-}
-
-thread_t
-_workq_reqthreads(struct proc *p, __assert_only int requests_count,
-               workq_reqthreads_req_t request)
-{
-       assert(requests_count == 1);
-
-       pthread_priority_t priority = request->priority;
-       bool no_emergency = request->count & WORKQ_REQTHREADS_NOEMERGENCY;
-
-       return _workq_kevent_reqthreads(p, priority, no_emergency);
-}
-
-
-int
-workq_kern_threadreq(struct proc *p, workq_threadreq_t _req,
-               enum workq_threadreq_type type, unsigned long priority, int flags)
-{
-       struct workqueue *wq;
-       int ret;
-
-       if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       switch (type) {
-       case WORKQ_THREADREQ_KEVENT: {
-               bool no_emergency = flags & WORKQ_THREADREQ_FLAG_NOEMERGENCY;
-               (void)_workq_kevent_reqthreads(p, priority, no_emergency);
-               return 0;
-       }
-       case WORKQ_THREADREQ_WORKLOOP:
-       case WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL: {
-               struct threadreq *req = (struct threadreq *)_req;
-               int req_class = pthread_priority_get_class_index(priority);
-               int req_flags = TR_FLAG_WORKLOOP;
-               if ((_pthread_priority_get_flags(priority) &
-                               _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
-                       req_flags |= TR_FLAG_OVERCOMMIT;
-               }
-
-               thread_t thread = current_thread();
-               struct threadlist *tl = util_get_thread_threadlist_entry(thread);
-
-               if (tl && tl != WQ_THREADLIST_EXITING_POISON &&
-                               (tl->th_flags & TH_LIST_UNBINDING)) {
-                       /*
-                        * we're called back synchronously from the context of
-                        * kevent_qos_internal_unbind from within wqops_thread_return()
-                        * we can try to match up this thread with this request !
-                        */
-               } else {
-                       tl = NULL;
-               }
-
-               _threadreq_init_alloced(req, req_class, req_flags);
-               workqueue_lock_spin(wq);
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, priority, 1, 0);
-               ret = workqueue_run_threadreq_and_unlock(p, wq, tl, req, false);
-               if (ret == WQ_RUN_TR_EXITING) {
-                       return ECANCELED;
-               }
-               if (ret == WQ_RUN_TR_THREAD_NEEDED) {
-                       if (type == WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL) {
-                               return EAGAIN;
-                       }
-                       if (WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-                               workqueue_interval_timer_trigger(wq);
-                       }
-               }
-               return 0;
-       }
-       case WORKQ_THREADREQ_REDRIVE:
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, 0, 0, 4, 0);
-               workqueue_lock_spin(wq);
-               ret = workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-               if (ret == WQ_RUN_TR_EXITING) {
-                       return ECANCELED;
-               }
-               return 0;
-       default:
-               return ENOTSUP;
-       }
-}
-
-int
-workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t _req,
-               enum workq_threadreq_op operation, unsigned long arg1,
-               unsigned long __unused arg2)
-{
-       struct threadreq *req = (struct threadreq *)_req;
-       struct workqueue *wq;
-       int priclass, ret = 0, wq_tr_rc = WQ_RUN_TR_THROTTLED;
-
-       if (req == NULL || (wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       workqueue_lock_spin(wq);
-
-       if (_wq_exiting(wq)) {
-               ret = ECANCELED;
-               goto out_unlock;
-       }
-
-       /*
-        * Find/validate the referenced request structure
-        */
-       if (req->tr_state != TR_STATE_WAITING) {
-               ret = EINVAL;
-               goto out_unlock;
-       }
-       assert(req->tr_priority < WORKQUEUE_EVENT_MANAGER_BUCKET);
-       assert(req->tr_flags & TR_FLAG_WORKLOOP);
-
-       switch (operation) {
-       case WORKQ_THREADREQ_CHANGE_PRI:
-       case WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL:
-               priclass = pthread_priority_get_class_index(arg1);
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, arg1, 2, 0);
-               if (req->tr_priority == priclass) {
-                       goto out_unlock;
-               }
-               _threadreq_dequeue(wq, req);
-               req->tr_priority = priclass;
-               req->tr_state = TR_STATE_NEW; // what was old is new again
-               wq_tr_rc = workqueue_run_threadreq_and_unlock(p, wq, NULL, req, false);
-               goto out;
-
-       case WORKQ_THREADREQ_CANCEL:
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, 0, 3, 0);
-               _threadreq_dequeue(wq, req);
-               req->tr_state = TR_STATE_DEAD;
-               break;
-
-       default:
-               ret = ENOTSUP;
-               break;
-       }
-
-out_unlock:
-       workqueue_unlock(wq);
-out:
-       if (wq_tr_rc == WQ_RUN_TR_THREAD_NEEDED) {
-               if (operation == WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL) {
-                       ret = EAGAIN;
-               } else if (WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-                       workqueue_interval_timer_trigger(wq);
-               }
-       }
-       return ret;
-}
-
-
-static int
-wqops_thread_return(struct proc *p, struct workqueue *wq)
-{
-       thread_t th = current_thread();
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-
-       /* reset signal mask on the workqueue thread to default state */
-       if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
-               pthread_kern->proc_lock(p);
-               pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
-               pthread_kern->proc_unlock(p);
-       }
-
-       if (wq == NULL || !tl) {
-               return EINVAL;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_START, tl->th_workq, 0, 0, 0, 0);
-
-       /*
-        * This squash call has neat semantics: it removes the specified overrides,
-        * replacing the current requested QoS with the previous effective QoS from
-        * those overrides.  This means we won't be preempted due to having our QoS
-        * lowered.  Of course, now our understanding of the thread's QoS is wrong,
-        * so we'll adjust below.
-        */
-       bool was_manager = (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-       int new_qos;
-
-       if (!was_manager) {
-               new_qos = pthread_kern->proc_usynch_thread_qos_squash_override_for_resource(th,
-                               THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD,
-                               THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_runitem | DBG_FUNC_END, wq, tl->th_priority, 0, 0, 0);
-
-       workqueue_lock_spin(wq);
-
-       if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
-               unsigned int flags = KEVENT_FLAG_WORKQ;
-               if (was_manager) {
-                       flags |= KEVENT_FLAG_WORKQ_MANAGER;
-               }
-
-               tl->th_flags |= TH_LIST_UNBINDING;
-               workqueue_unlock(wq);
-               kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, flags);
-               if (!(tl->th_flags & TH_LIST_UNBINDING)) {
-                       _setup_wqthread(p, th, wq, tl, WQ_SETUP_CLEAR_VOUCHER);
-                       pthread_kern->unix_syscall_return(EJUSTRETURN);
-                       __builtin_unreachable();
-               }
-               workqueue_lock_spin(wq);
-               tl->th_flags &= ~(TH_LIST_KEVENT_BOUND | TH_LIST_UNBINDING);
-       }
-
-       if (!was_manager) {
-               /* Fix up counters from the squash operation. */
-               uint8_t old_bucket = tl->th_priority;
-               uint8_t new_bucket = thread_qos_get_class_index(new_qos);
-
-               if (old_bucket != new_bucket) {
-                       _wq_thactive_move(wq, old_bucket, new_bucket);
-                       wq->wq_thscheduled_count[old_bucket]--;
-                       wq->wq_thscheduled_count[new_bucket]++;
-
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_squash | DBG_FUNC_NONE, wq, tl->th_priority, new_bucket, 0, 0);
-                       tl->th_priority = new_bucket;
-                       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_END, tl->th_workq, new_qos, 0, 0, 0);
-               }
-       }
-
-       workqueue_run_threadreq_and_unlock(p, wq, tl, NULL, false);
-       return 0;
-}
-
-/**
- * Multiplexed call to interact with the workqueue mechanism
- */
-int
-_workq_kernreturn(struct proc *p,
-                 int options,
-                 user_addr_t item,
-                 int arg2,
-                 int arg3,
-                 int32_t *retval)
-{
-       struct workqueue *wq;
-       int error = 0;
-
-       if (pthread_kern->proc_get_register(p) == 0) {
-               return EINVAL;
-       }
-
-       switch (options) {
-       case WQOPS_QUEUE_NEWSPISUPP: {
-               /*
-                * arg2 = offset of serialno into dispatch queue
-                * arg3 = kevent support
-                */
-               int offset = arg2;
-               if (arg3 & 0x01){
-                       // If we get here, then userspace has indicated support for kevent delivery.
-               }
-
-               pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
-               break;
-       }
-       case WQOPS_QUEUE_REQTHREADS: {
-               /*
-                * arg2 = number of threads to start
-                * arg3 = priority
-                */
-               error = wqops_queue_reqthreads(p, arg2, arg3);
-               break;
-       }
-       case WQOPS_SET_EVENT_MANAGER_PRIORITY: {
-               /*
-                * arg2 = priority for the manager thread
-                *
-                * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the
-                * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead
-                * of a QOS value
-                */
-               pthread_priority_t pri = arg2;
-
-               wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-               if (wq == NULL) {
-                       error = EINVAL;
-                       break;
-               }
-               workqueue_lock_spin(wq);
-               if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
-                       /*
-                        * If userspace passes a scheduling priority, that takes precidence
-                        * over any QoS.  (So, userspace should take care not to accidenatally
-                        * lower the priority this way.)
-                        */
-                       uint32_t sched_pri = pri & _PTHREAD_PRIORITY_SCHED_PRI_MASK;
-                       if (wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
-                               wq->wq_event_manager_priority = MAX(sched_pri, wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_MASK)
-                                               | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-                       } else {
-                               wq->wq_event_manager_priority = sched_pri
-                                               | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-                       }
-               } else if ((wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
-                       int cur_qos = pthread_priority_get_thread_qos(wq->wq_event_manager_priority);
-                       int new_qos = pthread_priority_get_thread_qos(pri);
-                       wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos, new_qos)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-               }
-               workqueue_unlock(wq);
-               break;
-       }
-       case WQOPS_THREAD_KEVENT_RETURN:
-       case WQOPS_THREAD_WORKLOOP_RETURN:
-               wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-               PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, options, 0, 0, 0);
-               if (item != 0 && arg2 != 0) {
-                       int32_t kevent_retval;
-                       int ret;
-                       if (options == WQOPS_THREAD_KEVENT_RETURN) {
-                               ret = kevent_qos_internal(p, -1, item, arg2, item, arg2, NULL, NULL,
-                                               KEVENT_FLAG_WORKQ | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS,
-                                               &kevent_retval);
-                       } else /* options == WQOPS_THREAD_WORKLOOP_RETURN */ {
-                               kqueue_id_t kevent_id = -1;
-                               ret = kevent_id_internal(p, &kevent_id, item, arg2, item, arg2,
-                                               NULL, NULL,
-                                               KEVENT_FLAG_WORKLOOP | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS,
-                                               &kevent_retval);
-                       }
-                       /*
-                        * We shouldn't be getting more errors out than events we put in, so
-                        * reusing the input buffer should always provide enough space.  But,
-                        * the assert is commented out since we get errors in edge cases in the
-                        * process lifecycle.
-                        */
-                       //assert(ret == KERN_SUCCESS && kevent_retval >= 0);
-                       if (ret != KERN_SUCCESS){
-                               error = ret;
-                               break;
-                       } else if (kevent_retval > 0){
-                               assert(kevent_retval <= arg2);
-                               *retval = kevent_retval;
-                               error = 0;
-                               break;
-                       }
-               }
-               goto thread_return;
-
-       case WQOPS_THREAD_RETURN:
-               wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-               PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, options, 0, 0, 0);
-       thread_return:
-               error = wqops_thread_return(p, wq);
-               // NOT REACHED except in case of error
-               assert(error);
-               break;
-
-       case WQOPS_SHOULD_NARROW: {
-               /*
-                * arg2 = priority to test
-                * arg3 = unused
-                */
-               pthread_priority_t priority = arg2;
-               thread_t th = current_thread();
-               struct threadlist *tl = util_get_thread_threadlist_entry(th);
-
-               if (tl == NULL || (tl->th_flags & TH_LIST_CONSTRAINED) == 0) {
-                       error = EINVAL;
-                       break;
-               }
-
-               int class = pthread_priority_get_class_index(priority);
-               wq = tl->th_workq;
-               workqueue_lock_spin(wq);
-               bool should_narrow = !may_start_constrained_thread(wq, class, tl, false);
-               workqueue_unlock(wq);
-
-               *retval = should_narrow;
-               break;
-       }
-       default:
-               error = EINVAL;
-               break;
-       }
-
-       switch (options) {
-       case WQOPS_THREAD_KEVENT_RETURN:
-       case WQOPS_THREAD_WORKLOOP_RETURN:
-       case WQOPS_THREAD_RETURN:
-               PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, wq, options, 0, 0, 0);
-               break;
-       }
-       return (error);
-}
-
-/*
- * We have no work to do, park ourselves on the idle list.
- *
- * Consumes the workqueue lock and does not return.
- */
-static void __dead2
-parkit(struct workqueue *wq, struct threadlist *tl, thread_t thread)
-{
-       assert(thread == tl->th_thread);
-       assert(thread == current_thread());
-
-       PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-       uint32_t us_to_wait = 0;
-
-       TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-
-       tl->th_flags &= ~TH_LIST_RUNNING;
-       tl->th_flags &= ~TH_LIST_KEVENT;
-       assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-
-       if (tl->th_flags & TH_LIST_CONSTRAINED) {
-               wq->wq_constrained_threads_scheduled--;
-               tl->th_flags &= ~TH_LIST_CONSTRAINED;
-       }
-
-       _wq_thactive_dec(wq, tl->th_priority);
-       wq->wq_thscheduled_count[tl->th_priority]--;
-       wq->wq_threads_scheduled--;
-       uint32_t thidlecount = ++wq->wq_thidlecount;
-
-       pthread_kern->thread_sched_call(thread, NULL);
-
-       /*
-        * We'd like to always have one manager thread parked so that we can have
-        * low latency when we need to bring a manager thread up.  If that idle
-        * thread list is empty, make this thread a manager thread.
-        *
-        * XXX: This doesn't check that there's not a manager thread outstanding,
-        * so it's based on the assumption that most manager callouts will change
-        * their QoS before parking.  If that stops being true, this may end up
-        * costing us more than we gain.
-        */
-       if (TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
-                       tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET){
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-                                       wq, thread_tid(thread),
-                                       (tl->th_priority << 16) | WORKQUEUE_EVENT_MANAGER_BUCKET, 2, 0);
-               reset_priority(tl, pthread_priority_from_wq_class_index(wq, WORKQUEUE_EVENT_MANAGER_BUCKET));
-               tl->th_priority = WORKQUEUE_EVENT_MANAGER_BUCKET;
-       }
-
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               TAILQ_INSERT_HEAD(&wq->wq_thidlemgrlist, tl, th_entry);
-       } else {
-               TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
-       }
-
-       /*
-        * When we remove the voucher from the thread, we may lose our importance
-        * causing us to get preempted, so we do this after putting the thread on
-        * the idle list.  That when, when we get our importance back we'll be able
-        * to use this thread from e.g. the kevent call out to deliver a boosting
-        * message.
-        */
-       tl->th_flags |= TH_LIST_REMOVING_VOUCHER;
-       workqueue_unlock(wq);
-       if (pthread_kern->thread_will_park_or_terminate) {
-               pthread_kern->thread_will_park_or_terminate(tl->th_thread);
-       }
-       __assert_only kern_return_t kr;
-       kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
-       assert(kr == KERN_SUCCESS);
-       workqueue_lock_spin(wq);
-       tl->th_flags &= ~(TH_LIST_REMOVING_VOUCHER);
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
-               if (thidlecount < 101) {
-                       us_to_wait = wq_reduce_pool_window_usecs - ((thidlecount-2) * (wq_reduce_pool_window_usecs / 100));
-               } else {
-                       us_to_wait = wq_reduce_pool_window_usecs / 100;
-               }
-
-               thread_set_pending_block_hint(thread, kThreadWaitParkedWorkQueue);
-               assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
-                               TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
-                               wq_reduce_pool_window_usecs/10, NSEC_PER_USEC);
-
-               workqueue_unlock(wq);
-
-               thread_block(wq_unpark_continue);
-               panic("thread_block(wq_unpark_continue) returned!");
-       } else {
-               workqueue_unlock(wq);
-
-               /*
-                * While we'd dropped the lock to unset our voucher, someone came
-                * around and made us runnable.  But because we weren't waiting on the
-                * event their wakeup() was ineffectual.  To correct for that, we just
-                * run the continuation ourselves.
-                */
-               wq_unpark_continue(NULL, THREAD_AWAKENED);
-       }
-}
-
-static bool
-may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass,
-               struct threadlist *tl, bool may_start_timer)
-{
-       uint32_t req_qos = _wq_thactive_best_constrained_req_qos(wq);
-       wq_thactive_t thactive;
-
-       if (may_start_timer && at_priclass < req_qos) {
-               /*
-                * When called from workqueue_run_threadreq_and_unlock() pre-post newest
-                * higher priorities into the thactive state so that
-                * workqueue_callback() takes the right decision.
-                *
-                * If the admission check passes, workqueue_run_threadreq_and_unlock
-                * will reset this value before running the request.
-                */
-               thactive = _wq_thactive_set_best_constrained_req_qos(wq, req_qos,
-                               at_priclass);
-#ifdef __LP64__
-               PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 1, (uint64_t)thactive,
-                               (uint64_t)(thactive >> 64), 0, 0);
-#endif
-       } else {
-               thactive = _wq_thactive(wq);
-       }
-
-       uint32_t constrained_threads = wq->wq_constrained_threads_scheduled;
-       if (tl && (tl->th_flags & TH_LIST_CONSTRAINED)) {
-               /*
-                * don't count the current thread as scheduled
-                */
-               constrained_threads--;
-       }
-       if (constrained_threads >= wq_max_constrained_threads) {
-               PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 1,
-                               wq->wq_constrained_threads_scheduled,
-                               wq_max_constrained_threads, 0);
-               /*
-                * we need 1 or more constrained threads to return to the kernel before
-                * we can dispatch additional work
-                */
-               return false;
-       }
-
-       /*
-        * Compute a metric for many how many threads are active.  We find the
-        * highest priority request outstanding and then add up the number of
-        * active threads in that and all higher-priority buckets.  We'll also add
-        * any "busy" threads which are not active but blocked recently enough that
-        * we can't be sure they've gone idle yet.  We'll then compare this metric
-        * to our max concurrency to decide whether to add a new thread.
-        */
-
-       uint32_t busycount, thactive_count;
-
-       thactive_count = _wq_thactive_aggregate_downto_qos(wq, thactive,
-                       at_priclass, &busycount, NULL);
-
-       if (tl && tl->th_priority <= at_priclass) {
-               /*
-                * don't count this thread as currently active
-                */
-               assert(thactive_count > 0);
-               thactive_count--;
-       }
-
-       if (thactive_count + busycount < wq_max_concurrency[at_priclass]) {
-               PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 2,
-                               thactive_count, busycount, 0);
-               return true;
-       } else {
-               PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 3,
-                               thactive_count, busycount, 0);
-       }
-
-       if (busycount && may_start_timer) {
-               /*
-                * If this is called from the add timer, we won't have another timer
-                * fire when the thread exits the "busy" state, so rearm the timer.
-                */
-               if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-                       workqueue_interval_timer_start(wq);
-               }
-       }
-
-       return false;
-}
-
-static struct threadlist *
-pop_from_thidlelist(struct workqueue *wq, uint32_t priclass)
-{
-       assert(wq->wq_thidlecount);
-
-       struct threadlist *tl = NULL;
-
-       if (!TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
-                       (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlelist))){
-               tl = TAILQ_FIRST(&wq->wq_thidlemgrlist);
-               TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
-               assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-       } else if (!TAILQ_EMPTY(&wq->wq_thidlelist) &&
-                       (priclass != WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlemgrlist))){
-               tl = TAILQ_FIRST(&wq->wq_thidlelist);
-               TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-               assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
-       } else {
-               panic("pop_from_thidlelist called with no threads available");
-       }
-       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-
-       assert(wq->wq_thidlecount);
-       wq->wq_thidlecount--;
-
-       TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
-
-       tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
-
-       wq->wq_threads_scheduled++;
-       wq->wq_thscheduled_count[priclass]++;
-       _wq_thactive_inc(wq, priclass);
-       return tl;
-}
-
-static pthread_priority_t
-pthread_priority_from_wq_class_index(struct workqueue *wq, int index)
-{
-       if (index == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               return wq->wq_event_manager_priority;
-       } else {
-               return class_index_get_pthread_priority(index);
-       }
-}
-
-static void
-reset_priority(struct threadlist *tl, pthread_priority_t pri)
-{
-       kern_return_t ret;
-       thread_t th = tl->th_thread;
-
-       if ((pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
-               ret = pthread_kern->thread_set_workq_qos(th, pthread_priority_get_thread_qos(pri), 0);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-               if (tl->th_flags & TH_LIST_EVENT_MGR_SCHED_PRI) {
-
-                       /* Reset priority to default (masked by QoS) */
-
-                       ret = pthread_kern->thread_set_workq_pri(th, 31, POLICY_TIMESHARE);
-                       assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-                       tl->th_flags &= ~TH_LIST_EVENT_MGR_SCHED_PRI;
-               }
-       } else {
-               ret = pthread_kern->thread_set_workq_qos(th, THREAD_QOS_UNSPECIFIED, 0);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-               ret = pthread_kern->thread_set_workq_pri(th, (pri & (~_PTHREAD_PRIORITY_FLAGS_MASK)), POLICY_TIMESHARE);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-               tl->th_flags |= TH_LIST_EVENT_MGR_SCHED_PRI;
-       }
-}
-
-/*
- * Picks the best request to run, and returns the best overcommit fallback
- * if the best pick is non overcommit and risks failing its admission check.
- */
-static struct threadreq *
-workqueue_best_threadreqs(struct workqueue *wq, struct threadlist *tl,
-               struct threadreq **fallback)
-{
-       struct threadreq *req, *best_req = NULL;
-       int priclass, prilimit;
-
-       if ((wq->wq_event_manager_threadreq.tr_state == TR_STATE_WAITING) &&
-                       ((wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0) ||
-                       (tl && tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) {
-               /*
-                * There's an event manager request and either:
-                *   - no event manager currently running
-                *   - we are re-using the event manager
-                */
-               req = &wq->wq_event_manager_threadreq;
-               PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, req, 1, 0, 0);
-               return req;
-       }
-
-       if (tl) {
-               prilimit = WORKQUEUE_EVENT_MANAGER_BUCKET;
-       } else {
-               prilimit = _wq_highest_paced_priority(wq);
-       }
-       for (priclass = 0; priclass < prilimit; priclass++) {
-               req = TAILQ_FIRST(&wq->wq_overcommit_reqlist[priclass]);
-               if (req) {
-                       PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, req, 2, 0, 0);
-                       if (best_req) {
-                               *fallback = req;
-                       } else {
-                               best_req = req;
-                       }
-                       break;
-               }
-               if (!best_req) {
-                       best_req = TAILQ_FIRST(&wq->wq_reqlist[priclass]);
-                       if (best_req) {
-                               PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, best_req, 3, 0, 0);
-                       }
-               }
-       }
-       return best_req;
-}
-
-/**
- * Runs a thread request on a thread
- *
- * - if thread is THREAD_NULL, will find a thread and run the request there.
- *   Otherwise, the thread must be the current thread.
- *
- * - if req is NULL, will find the highest priority request and run that.  If
- *   it is not NULL, it must be a threadreq object in state NEW.  If it can not
- *   be run immediately, it will be enqueued and moved to state WAITING.
- *
- *   Either way, the thread request object serviced will be moved to state
- *   PENDING and attached to the threadlist.
- *
- *   Should be called with the workqueue lock held.  Will drop it.
- *
- *   WARNING: _workq_kevent_reqthreads needs to be able to preflight any
- *   admission checks in this function.  If you are changing this function,
- *   keep that one up-to-date.
- *
- * - if parking_tl is non NULL, then the current thread is parking. This will
- *   try to reuse this thread for a request. If no match is found, it will be
- *   parked.
- */
-static int
-workqueue_run_threadreq_and_unlock(proc_t p, struct workqueue *wq,
-               struct threadlist *parking_tl, struct threadreq *req,
-               bool may_add_new_thread)
-{
-       struct threadreq *incoming_req = req;
-
-       struct threadlist *tl = parking_tl;
-       int rc = WQ_RUN_TR_THROTTLED;
-
-       assert(tl == NULL || tl->th_thread == current_thread());
-       assert(req == NULL || req->tr_state == TR_STATE_NEW);
-       assert(!may_add_new_thread || !tl);
-
-       PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq | DBG_FUNC_START, wq, req,
-                       tl ? thread_tid(tl->th_thread) : 0,
-                       req ? (req->tr_priority << 16 | req->tr_flags) : 0, 0);
-
-       /*
-        * Special cases when provided an event manager request
-        */
-       if (req && req->tr_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               // Clients must not rely on identity of event manager requests
-               assert(req->tr_flags & TR_FLAG_ONSTACK);
-               // You can't be both overcommit and event manager
-               assert((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0);
-
-               /*
-                * We can only ever have one event manager request, so coalesce them if
-                * there's already one outstanding.
-                */
-               if (wq->wq_event_manager_threadreq.tr_state == TR_STATE_WAITING) {
-                       PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_mgr_merge | DBG_FUNC_NONE, wq, req, 0, 0, 0);
-
-                       struct threadreq *existing_req = &wq->wq_event_manager_threadreq;
-                       if (req->tr_flags & TR_FLAG_KEVENT) {
-                               existing_req->tr_flags |= TR_FLAG_KEVENT;
-                       }
-
-                       req = existing_req;
-                       incoming_req = NULL;
-               }
-
-               if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
-                               (!tl || tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET)){
-                       /*
-                        * There can only be one event manager running at a time.
-                        */
-                       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 1, 0, 0, 0);
-                       goto done;
-               }
-       }
-
-again: // Start again after creating a thread
-
-       if (_wq_exiting(wq)) {
-               rc = WQ_RUN_TR_EXITING;
-               goto exiting;
-       }
-
-       /*
-        * Thread request selection and admission control
-        */
-       struct threadreq *fallback = NULL;
-       if (req) {
-               if ((req->tr_flags & TR_FLAG_NO_PACING) == 0 &&
-                               _wq_should_pace_priority(wq, req->tr_priority)) {
-                       /*
-                        * If a request fails the pacing admission check, then thread
-                        * requests are redriven when the pacing thread is finally scheduled
-                        * when it calls _wq_pacing_end() in wq_unpark_continue().
-                        */
-                       goto done;
-               }
-       } else if (wq->wq_reqcount == 0) {
-               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 2, 0, 0, 0);
-               goto done;
-       } else if ((req = workqueue_best_threadreqs(wq, tl, &fallback)) == NULL) {
-               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 3, 0, 0, 0);
-               goto done;
-       }
-
-       if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0 &&
-                       (req->tr_priority < WORKQUEUE_EVENT_MANAGER_BUCKET)) {
-               if (!may_start_constrained_thread(wq, req->tr_priority, parking_tl, true)) {
-                       if (!fallback) {
-                               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 4, 0, 0, 0);
-                               goto done;
-                       }
-                       assert(req->tr_state == TR_STATE_WAITING);
-                       req = fallback;
-               }
-       }
-
-       /*
-        * Thread selection.
-        */
-       if (parking_tl) {
-               if (tl->th_priority != req->tr_priority) {
-                       _wq_thactive_move(wq, tl->th_priority, req->tr_priority);
-                       wq->wq_thscheduled_count[tl->th_priority]--;
-                       wq->wq_thscheduled_count[req->tr_priority]++;
-               }
-               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-                               wq, 1, thread_tid(tl->th_thread), 0, 0);
-       } else if (wq->wq_thidlecount) {
-               tl = pop_from_thidlelist(wq, req->tr_priority);
-               /*
-                * This call will update wq_thscheduled_count and wq_thactive_count for
-                * the provided priority.  It will not set the returned thread to that
-                * priority.  This matches the behavior of the parking_tl clause above.
-                */
-               PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-                               wq, 2, thread_tid(tl->th_thread), 0, 0);
-       } else /* no idle threads */ {
-               if (!may_add_new_thread || wq->wq_nthreads >= wq_max_threads) {
-                       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 5,
-                                       may_add_new_thread, wq->wq_nthreads, 0);
-                       if (wq->wq_nthreads < wq_max_threads) {
-                               rc = WQ_RUN_TR_THREAD_NEEDED;
-                       }
-                       goto done;
-               }
-
-               bool added_thread = workqueue_addnewthread(p, wq);
-               /*
-                * workqueue_addnewthread will drop and re-take the lock, so we
-                * need to ensure we still have a cached request.
-                *
-                * It also means we have to pick a new request, since our old pick may
-                * not be valid anymore.
-                */
-               req = incoming_req;
-               if (req && (req->tr_flags & TR_FLAG_ONSTACK)) {
-                       _threadreq_copy_prepare(wq);
-               }
-
-               if (added_thread) {
-                       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-                                       wq, 3, 0, 0, 0);
-                       goto again;
-               } else if (_wq_exiting(wq)) {
-                       rc = WQ_RUN_TR_EXITING;
-                       goto exiting;
-               } else {
-                       PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 6, 0, 0, 0);
-                       /*
-                        * Something caused thread creation to fail.  Kick off the timer in
-                        * the hope that it'll succeed next time.
-                        */
-                       if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-                               workqueue_interval_timer_start(wq);
-                       }
-                       goto done;
-               }
-       }
-
-       /*
-        * Setup thread, mark request as complete and run with it.
-        */
-       if (req->tr_state == TR_STATE_WAITING) {
-               _threadreq_dequeue(wq, req);
-       }
-       if (tl->th_priority != req->tr_priority) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-                                       wq, thread_tid(tl->th_thread),
-                                       (tl->th_priority << 16) | req->tr_priority, 1, 0);
-               reset_priority(tl, pthread_priority_from_wq_class_index(wq, req->tr_priority));
-               tl->th_priority = (uint8_t)req->tr_priority;
-       }
-       if (req->tr_flags & TR_FLAG_OVERCOMMIT) {
-               if ((tl->th_flags & TH_LIST_CONSTRAINED) != 0) {
-                       tl->th_flags &= ~TH_LIST_CONSTRAINED;
-                       wq->wq_constrained_threads_scheduled--;
-               }
-       } else {
-               if ((tl->th_flags & TH_LIST_CONSTRAINED) == 0) {
-                       tl->th_flags |= TH_LIST_CONSTRAINED;
-                       wq->wq_constrained_threads_scheduled++;
-               }
-       }
-
-       if (!parking_tl && !(req->tr_flags & TR_FLAG_NO_PACING)) {
-               _wq_pacing_start(wq, tl);
-       }
-       if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
-               uint32_t old_qos, new_qos;
-
-               /*
-                * If we are scheduling a constrained thread request, we may need to
-                * update the best constrained qos in the thactive atomic state.
-                */
-               for (new_qos = 0; new_qos < WQ_THACTIVE_NO_PENDING_REQUEST; new_qos++) {
-                       if (TAILQ_FIRST(&wq->wq_reqlist[new_qos]))
-                               break;
-               }
-               old_qos = _wq_thactive_best_constrained_req_qos(wq);
-               if (old_qos != new_qos) {
-                       wq_thactive_t v = _wq_thactive_set_best_constrained_req_qos(wq,
-                                       old_qos, new_qos);
-#ifdef __LP64__
-                       PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 2, (uint64_t)v,
-                                       (uint64_t)(v >> 64), 0, 0);
-#else
-                       PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 2, v, 0, 0, 0);
-#endif
-               }
-       }
-       {
-               uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI;
-               if (req->tr_flags & TR_FLAG_OVERCOMMIT)
-                       upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
-               if (req->tr_flags & TR_FLAG_KEVENT)
-                       upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-               if (req->tr_flags & TR_FLAG_WORKLOOP)
-                       upcall_flags |= WQ_FLAG_THREAD_WORKLOOP | WQ_FLAG_THREAD_KEVENT;
-               if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET)
-                       upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
-               tl->th_upcall_flags = upcall_flags >> WQ_FLAG_THREAD_PRIOSHIFT;
-       }
-       if (req->tr_flags & TR_FLAG_KEVENT) {
-               tl->th_flags |= TH_LIST_KEVENT;
-       } else {
-               tl->th_flags &= ~TH_LIST_KEVENT;
-       }
-       return _threadreq_complete_and_unlock(p, wq, req, tl);
-
-done:
-       if (incoming_req) {
-               _threadreq_enqueue(wq, incoming_req);
-       }
-
-exiting:
-
-       if (parking_tl && !(parking_tl->th_flags & TH_LIST_UNBINDING)) {
-               parkit(wq, parking_tl, parking_tl->th_thread);
-               __builtin_unreachable();
-       }
-
-       workqueue_unlock(wq);
-
-       return rc;
-}
-
-/**
- * parked thread wakes up
- */
-static void __dead2
-wq_unpark_continue(void* __unused ptr, wait_result_t wait_result)
-{
-       boolean_t first_use = false;
-       thread_t th = current_thread();
-       proc_t p = current_proc();
-
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       if (uth == NULL) goto done;
-
-       struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
-       if (wq == NULL) goto done;
-
-       workqueue_lock_spin(wq);
-
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-       assert(tl != WQ_THREADLIST_EXITING_POISON);
-       if (tl == NULL) {
-               /*
-                * We woke up before addnewthread() was finished setting us up.  Go
-                * ahead and exit, but before we do poison the threadlist variable so
-                * that addnewthread() doesn't think we are valid still.
-                */
-               pthread_kern->uthread_set_threadlist(uth, WQ_THREADLIST_EXITING_POISON);
-               workqueue_unlock(wq);
-               goto done;
-       }
-
-       assert(tl->th_flags & TH_LIST_INITED);
-
-       if ((tl->th_flags & TH_LIST_NEW)){
-               tl->th_flags &= ~(TH_LIST_NEW);
-               first_use = true;
-       }
-
-       if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
-               /*
-                * The normal wakeup path.
-                */
-               goto return_to_user;
-       }
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0 &&
-                       wait_result == THREAD_TIMED_OUT &&
-                       tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET &&
-                       TAILQ_FIRST(&wq->wq_thidlemgrlist) == tl &&
-                       TAILQ_NEXT(tl, th_entry) == NULL){
-               /*
-                * If we are the only idle manager and we pop'ed for self-destruction,
-                * then don't actually exit.  Instead, free our stack to save some
-                * memory and re-park.
-                */
-
-               workqueue_unlock(wq);
-
-               vm_map_t vmap = wq->wq_map;
-
-               // Keep this in sync with _setup_wqthread()
-               const vm_size_t       guardsize = vm_map_page_size(vmap);
-               const user_addr_t     freeaddr = (user_addr_t)tl->th_stackaddr + guardsize;
-               const vm_map_offset_t freesize = vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, vm_map_page_mask(vmap)) - guardsize;
-
-               __assert_only int kr = mach_vm_behavior_set(vmap, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
-#if MACH_ASSERT
-               if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
-                       os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
-               }
-#endif
-
-               workqueue_lock_spin(wq);
-
-               if ( !(tl->th_flags & TH_LIST_RUNNING)) {
-                       thread_set_pending_block_hint(th, kThreadWaitParkedWorkQueue);
-                       assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE));
-
-                       workqueue_unlock(wq);
-
-                       thread_block(wq_unpark_continue);
-                       __builtin_unreachable();
-               }
-       }
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
-               assert((tl->th_flags & TH_LIST_BUSY) == 0);
-               if (!first_use) {
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, 0, 0, 0, 0);
-               }
-               /*
-                * We were set running, but not for the purposes of actually running.
-                * This could be because the timer elapsed.  Or it could be because the
-                * thread aborted.  Either way, we need to return to userspace to exit.
-                *
-                * The call to workqueue_removethread will consume the lock.
-                */
-
-               if (!first_use &&
-                               (tl->th_priority < qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS) ||
-                               (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) {
-                       // Reset the QoS to something low for the pthread cleanup
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-                                               wq, thread_tid(th),
-                                               (tl->th_priority << 16) | qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS), 3, 0);
-                       pthread_priority_t cleanup_pri = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
-                       reset_priority(tl, cleanup_pri);
-               }
-
-               workqueue_removethread(tl, 0, first_use);
-
-               if (first_use){
-                       pthread_kern->thread_bootstrap_return();
-               } else {
-                       pthread_kern->unix_syscall_return(0);
-               }
-               __builtin_unreachable();
-       }
-
-       /*
-        * The timer woke us up or the thread was aborted.  However, we have
-        * already started to make this a runnable thread.  Wait for that to
-        * finish, then continue to userspace.
-        */
-       while ((tl->th_flags & TH_LIST_BUSY)) {
-               assert_wait((caddr_t)tl, (THREAD_UNINT));
-
-               workqueue_unlock(wq);
-
-               thread_block(THREAD_CONTINUE_NULL);
-
-               workqueue_lock_spin(wq);
-       }
-
-return_to_user:
-       if (!first_use) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, 0, 0, 0, 0);
-       }
-       if (_wq_pacing_end(wq, tl) && wq->wq_reqcount) {
-               workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-       } else {
-               workqueue_unlock(wq);
-       }
-       _setup_wqthread(p, th, wq, tl, first_use ? WQ_SETUP_FIRST_USE : 0);
-       pthread_kern->thread_sched_call(th, workqueue_callback);
-done:
-       if (first_use){
-               pthread_kern->thread_bootstrap_return();
-       } else {
-               pthread_kern->unix_syscall_return(EJUSTRETURN);
-       }
-       panic("Our attempt to return to userspace failed...");
-}
+       if (kevent_data_available == WQ_KEVENT_DATA_SIZE) {
+               workq_thread_set_top_addr(th_addrs, kevent_id_addr);
+       } else {
+               workq_thread_set_top_addr(th_addrs,
+                               kevent_data_buf + kevent_data_available);
+       }
+       *kevent_count_out = kevent_count;
+       *kevent_list_out = kevent_list;
+       return ret;
+}
  
  /**
   * configures initial thread stack/registers to jump into:
@@ -3787,282 +840,90 @@ done:
   * |guard page | guardsize
   * |-----------| th_stackaddr
   */
+__attribute__((noreturn,noinline))
  void
-_setup_wqthread(proc_t p, thread_t th, struct workqueue *wq,
-               struct threadlist *tl, int setup_flags)
+workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
+               mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags)
  {
-       int error;
-       if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) {
-               /*
-                * For preemption reasons, we want to reset the voucher as late as
-                * possible, so we do it in two places:
-                *   - Just before parking (i.e. in parkit())
-                *   - Prior to doing the setup for the next workitem (i.e. here)
-                *
-                * Those two places are sufficient to ensure we always reset it before
-                * it goes back out to user space, but be careful to not break that
-                * guarantee.
-                */
-               __assert_only kern_return_t kr;
-               kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
-               assert(kr == KERN_SUCCESS);
-       }
-
-       uint32_t upcall_flags = tl->th_upcall_flags << WQ_FLAG_THREAD_PRIOSHIFT;
-       if (!(setup_flags & WQ_SETUP_FIRST_USE)) {
-               upcall_flags |= WQ_FLAG_THREAD_REUSE;
-       }
-
-       /*
-        * Put the QoS class value into the lower bits of the reuse_thread register, this is where
-        * the thread priority used to be stored anyway.
-        */
-       pthread_priority_t priority = pthread_priority_from_wq_class_index(wq, tl->th_priority);
-       upcall_flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
-
-       const vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
-       const vm_size_t stack_gap_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_REDZONE_LEN;
-       const vm_size_t stack_align_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_STK_ALIGN;
-
-       user_addr_t pthread_self_addr = (user_addr_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET);
-       user_addr_t stack_top_addr = (user_addr_t)((pthread_self_addr - stack_gap_min) & -stack_align_min);
-       user_addr_t stack_bottom_addr = (user_addr_t)(tl->th_stackaddr + guardsize);
+       struct workq_thread_addrs th_addrs;
+       bool first_use = (setup_flags & WQ_SETUP_FIRST_USE);
+       user_addr_t kevent_list = NULL;
+       int kevent_count = 0;
  
-       user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
-       if (!wqstart_fnptr) {
-               panic("workqueue thread start function pointer is NULL");
-       }
+       workq_thread_get_addrs(map, stackaddr, &th_addrs);
  
-       if (setup_flags & WQ_SETUP_FIRST_USE) {
+       if (first_use) {
                 uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
                 if (tsd_offset) {
-                       mach_vm_offset_t th_tsd_base = (mach_vm_offset_t)pthread_self_addr + tsd_offset;
-                       kern_return_t kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
+                       mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset;
+                       kern_return_t kret = pthread_kern->thread_set_tsd_base(th,
+                                       th_tsd_base);
                         if (kret == KERN_SUCCESS) {
                                 upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
                         }
                 }
  
                 /*
-               * Pre-fault the first page of the new thread's stack and the page that will
-               * contain the pthread_t structure.
-               */
-               vm_map_t vmap = pthread_kern->current_map();
-               if (vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
-                               vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap))){
-                       vm_fault( vmap,
-                                       vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
-                                       VM_PROT_READ | VM_PROT_WRITE,
-                                       FALSE,
-                                       THREAD_UNINT, NULL, 0);
+                * Pre-fault the first page of the new thread's stack and the page that will
+                * contain the pthread_t structure.
+                */
+               vm_map_offset_t mask = vm_map_page_mask(map);
+               vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask);
+               vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask);
+               if (th_page != stk_page) {
+                       vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
                 }
-               vm_fault( vmap,
-                               vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap)),
-                               VM_PROT_READ | VM_PROT_WRITE,
-                               FALSE,
-                               THREAD_UNINT, NULL, 0);
+               vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
         }
  
-       user_addr_t kevent_list = NULL;
-       int kevent_count = 0;
-       if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
-               bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP;
-
-               kevent_list = pthread_self_addr - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
-               kevent_count = WQ_KEVENT_LIST_LEN;
-
-               user_addr_t kevent_id_addr = kevent_list;
-               if (workloop) {
-                       /*
-                        * The kevent ID goes just below the kevent list.  Sufficiently new
-                        * userspace will know to look there.  Old userspace will just
-                        * ignore it.
-                        */
-                       kevent_id_addr -= sizeof(kqueue_id_t);
-               }
-
-               user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE;
-               user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE;
-
-               int32_t events_out = 0;
-
-               assert(tl->th_flags | TH_LIST_KEVENT_BOUND);
+       if (setup_flags & WQ_SETUP_EXIT_THREAD) {
+               kevent_count = WORKQ_EXIT_THREAD_NKEVENT;
+       } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) {
                 unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
-               if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                       flags |= KEVENT_FLAG_WORKQ_MANAGER;
-               }
-               int ret = 0;
-               if (workloop) {
-                       flags |= KEVENT_FLAG_WORKLOOP;
-                       kqueue_id_t kevent_id = -1;
-                       ret = kevent_id_internal(p, &kevent_id,
-                                       NULL, 0, kevent_list, kevent_count,
-                                       kevent_data_buf, &kevent_data_available,
-                                       flags, &events_out);
-                       copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id));
-               } else {
-                       flags |= KEVENT_FLAG_WORKQ;
-                       ret = kevent_qos_internal(p,
-                                       class_index_get_thread_qos(tl->th_priority),
-                                       NULL, 0, kevent_list, kevent_count,
-                                       kevent_data_buf, &kevent_data_available,
-                                       flags, &events_out);
-               }
-
-               // squash any errors into just empty output
-               if (ret != KERN_SUCCESS || events_out == -1){
-                       events_out = 0;
-                       kevent_data_available = WQ_KEVENT_DATA_SIZE;
-               }
-
-               // We shouldn't get data out if there aren't events available
-               assert(events_out != 0 || kevent_data_available == WQ_KEVENT_DATA_SIZE);
-
-               if (events_out > 0){
-                       if (kevent_data_available == WQ_KEVENT_DATA_SIZE){
-                               stack_top_addr = (kevent_id_addr - stack_gap_min) & -stack_align_min;
-                       } else {
-                               stack_top_addr = (kevent_data_buf + kevent_data_available - stack_gap_min) & -stack_align_min;
-                       }
-
-                       kevent_count = events_out;
-               } else {
-                       kevent_list = NULL;
-                       kevent_count = 0;
-               }
+               workq_kevent(p, &th_addrs, upcall_flags, NULL, 0, flags,
+                               &kevent_list, &kevent_count);
         }
  
-       PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-#if defined(__i386__) || defined(__x86_64__)
-       if (proc_is64bit(p) == 0) {
-               x86_thread_state32_t state = {
-                       .eip = (unsigned int)wqstart_fnptr,
-                       .eax = /* arg0 */ (unsigned int)pthread_self_addr,
-                       .ebx = /* arg1 */ (unsigned int)tl->th_thport,
-                       .ecx = /* arg2 */ (unsigned int)stack_bottom_addr,
-                       .edx = /* arg3 */ (unsigned int)kevent_list,
-                       .edi = /* arg4 */ (unsigned int)upcall_flags,
-                       .esi = /* arg5 */ (unsigned int)kevent_count,
-
-                       .esp = (int)((vm_offset_t)stack_top_addr),
-               };
+       workq_set_register_state(p, th, &th_addrs, kport,
+                       kevent_list, upcall_flags, kevent_count);
  
-               error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-               if (error != KERN_SUCCESS) {
-                       panic(__func__ ": thread_set_wq_state failed: %d", error);
-               }
+       if (first_use) {
+               pthread_kern->thread_bootstrap_return();
         } else {
-               x86_thread_state64_t state64 = {
-                       // x86-64 already passes all the arguments in registers, so we just put them in their final place here
-                       .rip = (uint64_t)wqstart_fnptr,
-                       .rdi = (uint64_t)pthread_self_addr,
-                       .rsi = (uint64_t)tl->th_thport,
-                       .rdx = (uint64_t)stack_bottom_addr,
-                       .rcx = (uint64_t)kevent_list,
-                       .r8  = (uint64_t)upcall_flags,
-                       .r9  = (uint64_t)kevent_count,
-
-                       .rsp = (uint64_t)(stack_top_addr)
-               };
-
-               error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
-               if (error != KERN_SUCCESS) {
-                       panic(__func__ ": thread_set_wq_state failed: %d", error);
-               }
+               pthread_kern->unix_syscall_return(EJUSTRETURN);
         }
-#else
-#error setup_wqthread  not defined for this architecture
-#endif
-}
-
-#if DEBUG
-static int wq_kevent_test SYSCTL_HANDLER_ARGS {
-       //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
-#pragma unused(oidp, arg1, arg2)
-       int error;
-       struct workq_reqthreads_req_s requests[64] = {};
-
-       if (req->newlen > sizeof(requests) || req->newlen < sizeof(struct workq_reqthreads_req_s))
-               return EINVAL;
-
-       error = copyin(req->newptr, requests, req->newlen);
-       if (error) return error;
-
-       _workq_reqthreads(req->p, (int)(req->newlen / sizeof(struct workq_reqthreads_req_s)), requests);
-
-       return 0;
+       __builtin_unreachable();
  }
-#endif // DEBUG
-
-#pragma mark - Misc
  
  int
-_fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
+workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
+               user_addr_t stackaddr, mach_port_name_t kport,
+               user_addr_t events, int nevents, int upcall_flags)
  {
-       struct workqueue * wq;
-       int error = 0;
-       int     activecount;
-
-       if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       /*
-        * This is sometimes called from interrupt context by the kperf sampler.
-        * In that case, it's not safe to spin trying to take the lock since we
-        * might already hold it.  So, we just try-lock it and error out if it's
-        * already held.  Since this is just a debugging aid, and all our callers
-        * are able to handle an error, that's fine.
-        */
-       bool locked = workqueue_lock_try(wq);
-       if (!locked) {
-               return EBUSY;
-       }
-
-       activecount = _wq_thactive_aggregate_downto_qos(wq, _wq_thactive(wq),
-                       WORKQUEUE_NUM_BUCKETS - 1, NULL, NULL);
-       pwqinfo->pwq_nthreads = wq->wq_nthreads;
-       pwqinfo->pwq_runthreads = activecount;
-       pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
-       pwqinfo->pwq_state = 0;
-
-       if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-               pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
-       }
-
-       if (wq->wq_nthreads >= wq_max_threads) {
-               pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
-       }
-
-       workqueue_unlock(wq);
-       return(error);
-}
+       struct workq_thread_addrs th_addrs;
+       user_addr_t kevent_list = NULL;
+       int kevent_count = 0, error;
+       __assert_only kern_return_t kr;
  
-uint32_t
-_get_pwq_state_kdp(proc_t p)
-{
-       if (p == NULL) {
-               return 0;
-       }
+       workq_thread_get_addrs(map, stackaddr, &th_addrs);
  
-       struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
+       unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE |
+                       KEVENT_FLAG_PARKING;
+       error = workq_kevent(p, &th_addrs, upcall_flags, events, nevents, flags,
+                       &kevent_list, &kevent_count);
  
-       if (wq == NULL || workqueue_lock_spin_is_acquired_kdp(wq)) {
-               return 0;
+       if (error || kevent_count == 0) {
+               return error;
         }
  
-       uint32_t pwq_state = WQ_FLAGS_AVAILABLE;
-
-       if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-               pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
-       }
+       kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
+       assert(kr == KERN_SUCCESS);
  
-       if (wq->wq_nthreads >= wq_max_threads) {
-               pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
-       }
+       workq_set_register_state(p, th, &th_addrs, kport,
+                       kevent_list, upcall_flags, kevent_count);
  
-       return pwq_state;
+       pthread_kern->unix_syscall_return(EJUSTRETURN);
+       __builtin_unreachable();
  }
  
  int
@@ -4083,44 +944,16 @@ _pthread_init(void)
          * allocate the lock attribute for pthread synchronizers
          */
         pthread_lck_attr = lck_attr_alloc_init();
-
         pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
  
         pth_global_hashinit();
         psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
         psynch_zoneinit();
  
-       pthread_zone_workqueue = zinit(sizeof(struct workqueue),
-                       1024 * sizeof(struct workqueue), 8192, "pthread.workqueue");
-       pthread_zone_threadlist = zinit(sizeof(struct threadlist),
-                       1024 * sizeof(struct threadlist), 8192, "pthread.threadlist");
-       pthread_zone_threadreq = zinit(sizeof(struct threadreq),
-                       1024 * sizeof(struct threadreq), 8192, "pthread.threadreq");
-
         int policy_bootarg;
         if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) {
                 pthread_mutex_default_policy = policy_bootarg;
         }
  
-       /*
-        * register sysctls
-        */
-       sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_max_threads);
-       sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
-       sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
         sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy);
-
-#if DEBUG
-       sysctl_register_oid(&sysctl__debug_wq_kevent_test);
-#endif
-
-       for (int i = 0; i < WORKQUEUE_NUM_BUCKETS; i++) {
-               uint32_t thread_qos = _wq_bucket_to_thread_qos(i);
-               wq_max_concurrency[i] = pthread_kern->qos_max_parallelism(thread_qos,
-                               QOS_PARALLELISM_COUNT_LOGICAL);
-       }
-       wq_max_concurrency[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
  }
diff --git a/kern/kern_synch.c b/kern/kern_synch.c

index 217ddcb7bd00324cd56aa0985208a2e46a524b17..7dabe413562e5a8bb30ef8a14fece8f94631359f 100644 (file)
--- a/kern/kern_synch.c
+++ b/kern/kern_synch.c
@@ -69,6 +69,7 @@
  #include <kern/sched_prim.h>
  #include <kern/processor.h>
  #include <kern/block_hint.h>
+#include <kern/turnstile.h>
  //#include <kern/mach_param.h>
  #include <mach/mach_vm.h>
  #include <mach/mach_param.h>
@@ -82,7 +83,6 @@
  #include <libkern/OSAtomic.h>
  
  #include <pexpert/pexpert.h>
-#include <sys/pthread_shims.h>
  
  #include "kern_internal.h"
  #include "synch_internal.h"
@@ -92,9 +92,7 @@ typedef struct uthread *uthread_t;
  
  //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
  #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
-
-#define ECVCERORR      256
-#define ECVPERORR      512
+#define __FAILEDUSERTEST2__(s, x...) do { printf("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0)
  
  lck_mtx_t *pthread_list_mlock;
  
@@ -119,17 +117,23 @@ struct ksyn_queue {
  };
  typedef struct ksyn_queue *ksyn_queue_t;
  
-enum {
+typedef enum {
         KSYN_QUEUE_READ = 0,
-       KSYN_QUEUE_WRITER,
+       KSYN_QUEUE_WRITE,
         KSYN_QUEUE_MAX,
-};
+} kwq_queue_type_t;
+
+typedef enum {
+       KWQ_INTR_NONE = 0,
+       KWQ_INTR_READ = 0x1,
+       KWQ_INTR_WRITE = 0x2,
+} kwq_intr_type_t;
  
  struct ksyn_wait_queue {
         LIST_ENTRY(ksyn_wait_queue) kw_hash;
         LIST_ENTRY(ksyn_wait_queue) kw_list;
         user_addr_t kw_addr;
-       uint64_t kw_owner;
+       thread_t kw_owner;              /* current owner or THREAD_NULL, has a +1 */
         uint64_t kw_object;             /* object backing in shared mode */
         uint64_t kw_offset;             /* offset inside the object in shared mode */
         int     kw_pflags;              /* flags under listlock protection */
@@ -151,19 +155,23 @@ struct ksyn_wait_queue {
         uint32_t kw_lastseqword;                /* the last seq that unlocked */
         /* for mutex and cvar we need to track I bit values */
         uint32_t kw_nextseqword;        /* the last seq that unlocked; with num of waiters */
-       uint32_t kw_overlapwatch;       /* chance for overlaps */
-       uint32_t kw_pre_rwwc;           /* prepost count */
-       uint32_t kw_pre_lockseq;        /* prepost target seq */
-       uint32_t kw_pre_sseq;           /* prepost target sword, in cvar used for mutexowned */
-       uint32_t kw_pre_intrcount;      /* prepost of missed wakeup due to intrs */
-       uint32_t kw_pre_intrseq;        /* prepost of missed wakeup limit seq */
-       uint32_t kw_pre_intrretbits;    /* return bits value for missed wakeup threads */
-       uint32_t kw_pre_intrtype;       /* type of failed wakueps*/
+       struct {
+               uint32_t count; /* prepost count */
+               uint32_t lseq; /* prepost target seq */
+               uint32_t sseq; /* prepost target sword, in cvar used for mutexowned */
+       } kw_prepost;
+       struct {
+               kwq_intr_type_t type; /* type of failed wakueps */
+               uint32_t count; /* prepost of missed wakeup due to intrs */
+               uint32_t seq; /* prepost of missed wakeup limit seq */
+               uint32_t returnbits; /* return bits value for missed wakeup threads */
+       } kw_intr;
         
         int     kw_kflags;
         int             kw_qos_override;        /* QoS of max waiter during contention period */
+       struct turnstile *kw_turnstile;
         struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX];        /* queues to hold threads */
-       lck_mtx_t kw_lock;              /* mutex lock protecting this structure */
+       lck_spin_t kw_lock;             /* spinlock protecting this structure */
  };
  typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
  
@@ -189,14 +197,9 @@ typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
  /*
   * Mutex policy attributes
   */
-#define _PTHREAD_MUTEX_POLICY_NONE             0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE                0x040   /* 1 */
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT         0x080   /* 2 */
-#define _PTHREAD_MUTEX_POLICY_REALTIME         0x0c0   /* 3 */
-#define _PTHREAD_MUTEX_POLICY_ADAPTIVE         0x100   /* 4 */
-#define _PTHREAD_MUTEX_POLICY_PRIPROTECT       0x140   /* 5 */
-#define _PTHREAD_MUTEX_POLICY_PRIINHERIT       0x180   /* 6 */
-#define PTHREAD_POLICY_FLAGS_MASK              0x1c0
+#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE      0x040   /* 1 */
+#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT       0x080   /* 2 */
+#define _PTHREAD_MTX_OPT_POLICY_MASK           0x1c0
  
  /* pflags */
  #define KSYN_WQ_INHASH 2
@@ -205,9 +208,10 @@ typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
  #define KSYN_WQ_FLIST  0X10    /* in free list to be freed after a short delay */
  
  /* kflags */
-#define KSYN_KWF_INITCLEARED   1       /* the init status found and preposts cleared */
-#define KSYN_KWF_ZEROEDOUT     2       /* the lword, etc are inited to 0 */
-#define KSYN_KWF_QOS_APPLIED   4       /* QoS override applied to owner */
+#define KSYN_KWF_INITCLEARED   0x1     /* the init status found and preposts cleared */
+#define KSYN_KWF_ZEROEDOUT     0x2     /* the lword, etc are inited to 0 */
+#define KSYN_KWF_QOS_APPLIED   0x4     /* QoS override applied to owner */
+#define KSYN_KWF_OVERLAP_GUARD 0x8     /* overlap guard */
  
  #define KSYN_CLEANUP_DEADLINE 10
  static int psynch_cleanupset;
@@ -223,47 +227,24 @@ thread_call_t psynch_thcall;
  
  #define KSYN_WQTYPE_MUTEXDROP  (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
  
-#define KW_UNLOCK_PREPOST              0x01
-#define KW_UNLOCK_PREPOST_READLOCK     0x08
-#define KW_UNLOCK_PREPOST_WRLOCK       0x20
-
-static void
-CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq)
+static inline int
+_kwq_type(ksyn_wait_queue_t kwq)
  {
-       kwq->kw_pre_lockseq = 0;
-       kwq->kw_pre_sseq = PTHRW_RWS_INIT;
-       kwq->kw_pre_rwwc = 0;
+       return (kwq->kw_type & KSYN_WQTYPE_MASK);
  }
  
-static void
-CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq)
+static inline bool
+_kwq_use_turnstile(ksyn_wait_queue_t kwq)
  {
-       kwq->kw_pre_intrcount = 0;
-       kwq->kw_pre_intrseq = 0;
-       kwq->kw_pre_intrretbits = 0;
-       kwq->kw_pre_intrtype = 0;
+       // <rdar://problem/15926625> If we had writer-owner information from the
+       // rwlock then we could use the turnstile to push on it. For now, only
+       // plain mutexes use it.
+       return (_kwq_type(kwq) == KSYN_WQTYPE_MTX);
  }
  
-static void
-CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
-{
-       if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
-               if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
-                       panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
-               }
-       };
-       if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
-               kwq->kw_nextseqword = PTHRW_RWS_INIT;
-               kwq->kw_overlapwatch = 0;
-       };
-       CLEAR_PREPOST_BITS(kwq);
-       kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
-       kwq->kw_lastseqword = PTHRW_RWS_INIT;
-       CLEAR_INTR_PREPOST_BITS(kwq);
-       kwq->kw_lword = 0;
-       kwq->kw_uword = 0;
-       kwq->kw_sword = PTHRW_RWS_INIT;
-}
+#define KW_UNLOCK_PREPOST              0x01
+#define KW_UNLOCK_PREPOST_READLOCK     0x08
+#define KW_UNLOCK_PREPOST_WRLOCK       0x20
  
  static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
  static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
@@ -272,13 +253,11 @@ static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
  
  static int _wait_result_to_errno(wait_result_t result);
  
-static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t, block_hint_t);
-static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
+static int ksyn_wait(ksyn_wait_queue_t, kwq_queue_type_t, uint32_t, int, uint64_t, uint16_t, thread_continue_t, block_hint_t);
+static kern_return_t ksyn_signal(ksyn_wait_queue_t, kwq_queue_type_t, ksyn_waitq_element_t, uint32_t);
  static void ksyn_freeallkwe(ksyn_queue_t kq);
  
-static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t);
-static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost);
-static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t);
+static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t, thread_t *);
  
  static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
  
@@ -299,8 +278,10 @@ static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t
  static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
  static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
  
-static void psynch_cvcontinue(void *, wait_result_t);
-static void psynch_mtxcontinue(void *, wait_result_t);
+static void __dead2 psynch_cvcontinue(void *, wait_result_t);
+static void __dead2 psynch_mtxcontinue(void *, wait_result_t);
+static void __dead2 psynch_rw_rdcontinue(void *, wait_result_t);
+static void __dead2 psynch_rw_wrcontinue(void *, wait_result_t);
  
  static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
  static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
@@ -335,6 +316,196 @@ UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc
         }
  }
  
+static inline void
+_kwq_clear_preposted_wakeup(ksyn_wait_queue_t kwq)
+{
+       kwq->kw_prepost.lseq = 0;
+       kwq->kw_prepost.sseq = PTHRW_RWS_INIT;
+       kwq->kw_prepost.count = 0;
+}
+
+static inline void
+_kwq_mark_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t count,
+               uint32_t lseq, uint32_t sseq)
+{
+       kwq->kw_prepost.count = count;
+       kwq->kw_prepost.lseq = lseq;
+       kwq->kw_prepost.sseq = sseq;
+}
+
+static inline void
+_kwq_clear_interrupted_wakeup(ksyn_wait_queue_t kwq)
+{
+       kwq->kw_intr.type = KWQ_INTR_NONE;
+       kwq->kw_intr.count = 0;
+       kwq->kw_intr.seq = 0;
+       kwq->kw_intr.returnbits = 0;
+}
+
+static inline void
+_kwq_mark_interruped_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
+               uint32_t count, uint32_t lseq, uint32_t returnbits)
+{
+       kwq->kw_intr.count = count;
+       kwq->kw_intr.seq = lseq;
+       kwq->kw_intr.returnbits = returnbits;
+       kwq->kw_intr.type = type;
+}
+
+static void
+_kwq_destroy(ksyn_wait_queue_t kwq)
+{
+       if (kwq->kw_owner) {
+               thread_deallocate(kwq->kw_owner);
+       }
+       lck_spin_destroy(&kwq->kw_lock, pthread_lck_grp);
+       zfree(kwq_zone, kwq);
+}
+
+#define KWQ_SET_OWNER_TRANSFER_REF  0x1
+
+static inline thread_t
+_kwq_set_owner(ksyn_wait_queue_t kwq, thread_t new_owner, int flags)
+{
+       thread_t old_owner = kwq->kw_owner;
+       if (old_owner == new_owner) {
+               if (flags & KWQ_SET_OWNER_TRANSFER_REF) return new_owner;
+               return THREAD_NULL;
+       }
+       if ((flags & KWQ_SET_OWNER_TRANSFER_REF) == 0) {
+               thread_reference(new_owner);
+       }
+       kwq->kw_owner = new_owner;
+       return old_owner;
+}
+
+static inline thread_t
+_kwq_clear_owner(ksyn_wait_queue_t kwq)
+{
+       return _kwq_set_owner(kwq, THREAD_NULL, KWQ_SET_OWNER_TRANSFER_REF);
+}
+
+static inline void
+_kwq_cleanup_old_owner(thread_t *thread)
+{
+       if (*thread) {
+               thread_deallocate(*thread);
+               *thread = THREAD_NULL;
+       }
+}
+
+static void
+CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
+{
+       if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
+               if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
+                       panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
+               }
+       };
+       if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
+               kwq->kw_nextseqword = PTHRW_RWS_INIT;
+               kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
+       };
+       _kwq_clear_preposted_wakeup(kwq);
+       kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
+       kwq->kw_lastseqword = PTHRW_RWS_INIT;
+       _kwq_clear_interrupted_wakeup(kwq);
+       kwq->kw_lword = 0;
+       kwq->kw_uword = 0;
+       kwq->kw_sword = PTHRW_RWS_INIT;
+}
+
+static bool
+_kwq_handle_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t type,
+               uint32_t lseq, uint32_t *retval)
+{
+       if (kwq->kw_prepost.count == 0 ||
+                       !is_seqlower_eq(lseq, kwq->kw_prepost.lseq)) {
+               return false;
+       }
+
+       kwq->kw_prepost.count--;
+       if (kwq->kw_prepost.count > 0) {
+               return false;
+       }
+
+       int error, should_block = 0;
+       uint32_t updatebits = 0;
+       uint32_t pp_lseq = kwq->kw_prepost.lseq;
+       uint32_t pp_sseq = kwq->kw_prepost.sseq;
+       _kwq_clear_preposted_wakeup(kwq);
+
+       kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+
+       error = kwq_handle_unlock(kwq, pp_lseq, pp_sseq, &updatebits,
+                       (type | KW_UNLOCK_PREPOST), &should_block, lseq);
+       if (error) {
+               panic("_kwq_handle_preposted_wakeup: kwq_handle_unlock failed %d",
+                               error);
+       }
+
+       if (should_block) {
+               return false;
+       }
+       *retval = updatebits;
+       return true;
+}
+
+static bool
+_kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t type, uint32_t lgenval, 
+               uint32_t rw_wc, uint32_t *retval)
+{
+       int res = 0;
+
+       // overlaps only occur on read lockers
+       if (type != PTH_RW_TYPE_READ) {
+               return false;
+       }
+
+       // check for overlap and no pending W bit (indicates writers)
+       if ((kwq->kw_kflags & KSYN_KWF_OVERLAP_GUARD) &&
+                       !is_rws_savemask_set(rw_wc) && !is_rwl_wbit_set(lgenval)) {
+               /* overlap is set, so no need to check for valid state for overlap */
+
+               if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
+                       /* increase the next expected seq by one */
+                       kwq->kw_nextseqword += PTHRW_INC;
+                       /* set count by one & bits from the nextseq and add M bit */
+                       *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
+                       res = 1;
+               }
+       }
+       return res;
+}
+
+static inline bool
+_kwq_is_used(ksyn_wait_queue_t kwq)
+{
+       return (kwq->kw_inqueue != 0 || kwq->kw_prepost.count != 0 ||
+                       kwq->kw_intr.count != 0);
+}
+
+/*
+ * consumes a pending interrupted waiter, returns true if the current
+ * thread should return back to userspace because it was previously
+ * interrupted.
+ */
+static inline bool
+_kwq_handle_interrupted_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
+               uint32_t lseq, uint32_t *retval)
+{
+       if (kwq->kw_intr.count != 0 && kwq->kw_intr.type == type &&
+                       (!kwq->kw_intr.seq || is_seqlower_eq(lseq, kwq->kw_intr.seq))) {
+               kwq->kw_intr.count--;
+               *retval = kwq->kw_intr.returnbits;
+               if (kwq->kw_intr.returnbits == 0) {
+                       _kwq_clear_interrupted_wakeup(kwq);
+               }
+               return true;
+       }
+       return false;
+}
+
  static void
  pthread_list_lock(void)
  {
@@ -350,98 +521,117 @@ pthread_list_unlock(void)
  static void
  ksyn_wqlock(ksyn_wait_queue_t kwq)
  {
-       
-       lck_mtx_lock(&kwq->kw_lock);
+       lck_spin_lock(&kwq->kw_lock);
  }
  
  static void
  ksyn_wqunlock(ksyn_wait_queue_t kwq)
  {
-       lck_mtx_unlock(&kwq->kw_lock);
+       lck_spin_unlock(&kwq->kw_lock);
  }
  
-
  /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
  static uint32_t
-_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags)
+_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen,
+               int flags)
  {
         kern_return_t ret;
         uint32_t returnbits = 0;
-       int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+       uint32_t updatebits = 0;
+       int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) ==
+                       _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
         uint32_t nextgen = (ugen + PTHRW_INC);
+       thread_t old_owner = THREAD_NULL;
  
         ksyn_wqlock(kwq);
         kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
-       uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
  
  redrive:
+       updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
+                       (PTH_RWL_EBIT | PTH_RWL_KBIT);
+
         if (firstfit) {
                 if (kwq->kw_inqueue == 0) {
-                       // not set or the new lock sequence is higher
-                       if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) {
-                               kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK);
-                       }
-                       kwq->kw_pre_rwwc = 1;
-                       ksyn_mtx_drop_qos_override(kwq);
-                       kwq->kw_owner = 0;
-                       // indicate prepost content in kernel
-                       returnbits = mgen | PTH_RWL_PBIT;
+                       uint32_t count = kwq->kw_prepost.count + 1;
+                       // Increment the number of preposters we have waiting
+                       _kwq_mark_preposted_wakeup(kwq, count, mgen & PTHRW_COUNT_MASK, 0);
+                       // We don't know the current owner as we've determined this mutex
+                       // drop should have a preposted locker inbound into the kernel but
+                       // we have no way of knowing who it is. When it arrives, the lock
+                       // path will update the turnstile owner and return it to userspace.
+                       old_owner = _kwq_clear_owner(kwq);
+                       pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
+                                       &kwq->kw_turnstile);
+                       PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+                                       kwq->kw_prepost.lseq, count, 0);
                 } else {
                         // signal first waiter
-                       ret = ksyn_mtxsignal(kwq, NULL, updatebits);
+                       ret = ksyn_mtxsignal(kwq, NULL, updatebits, &old_owner);
                         if (ret == KERN_NOT_WAITING) {
+                               // <rdar://problem/39093536> ksyn_mtxsignal attempts to signal
+                               // the thread but it sets up the turnstile inheritor first.
+                               // That means we can't redrive the mutex in a loop without
+                               // dropping the wq lock and cleaning up the turnstile state.
+                               ksyn_wqunlock(kwq);
+                               pthread_kern->psynch_wait_cleanup();
+                               _kwq_cleanup_old_owner(&old_owner);
+                               ksyn_wqlock(kwq);
                                 goto redrive;
                         }
                 }
         } else {        
-               int prepost = 0;
+               bool prepost = false;
                 if (kwq->kw_inqueue == 0) {
                         // No waiters in the queue.
-                       prepost = 1;
+                       prepost = true;
                 } else {
-                       uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK);
+                       uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum & PTHRW_COUNT_MASK);
                         if (low_writer == nextgen) {
                                 /* next seq to be granted found */
                                 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
-                               ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT);
+                               ret = ksyn_mtxsignal(kwq, NULL,
+                                               updatebits | PTH_RWL_MTX_WAIT, &old_owner);
                                 if (ret == KERN_NOT_WAITING) {
                                         /* interrupt post */
-                                       kwq->kw_pre_intrcount = 1;
-                                       kwq->kw_pre_intrseq = nextgen;
-                                       kwq->kw_pre_intrretbits = updatebits;
-                                       kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
+                                       _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
+                                                       nextgen, updatebits);
                                 }
-                               
                         } else if (is_seqhigher(low_writer, nextgen)) {
-                               prepost = 1;
+                               prepost = true;
                         } else {
                                 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
                                 ksyn_waitq_element_t kwe;
-                               kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
+                               kwe = ksyn_queue_find_seq(kwq,
+                                               &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], nextgen);
                                 if (kwe != NULL) {
                                         /* next seq to be granted found */
                                         /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
-                                       ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT);
+                                       ret = ksyn_mtxsignal(kwq, kwe,
+                                                       updatebits | PTH_RWL_MTX_WAIT, &old_owner);
                                         if (ret == KERN_NOT_WAITING) {
                                                 goto redrive;
                                         }
                                 } else {
-                                       prepost = 1;
+                                       prepost = true;
                                 }
                         }
                 }
                 if (prepost) {
-                       ksyn_mtx_drop_qos_override(kwq);
-                       kwq->kw_owner = 0;
-                       if (++kwq->kw_pre_rwwc > 1) {
+                       if (kwq->kw_prepost.count != 0) {
                                 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
                         } else {
-                               kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
+                               _kwq_mark_preposted_wakeup(kwq, 1, nextgen & PTHRW_COUNT_MASK,
+                                               0);
                         }
+                       old_owner = _kwq_clear_owner(kwq);
+                       pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
+                                       &kwq->kw_turnstile);
                 }
         }
-       
+
         ksyn_wqunlock(kwq);
+       pthread_kern->psynch_wait_cleanup();
+       _kwq_cleanup_old_owner(&old_owner);
         ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
         return returnbits;
  }
@@ -460,354 +650,216 @@ _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
         return res;
  }
  
-static int
-_ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq,
-                           uint32_t type,
-                           uint32_t lockseq,
-                           uint32_t *retval)
-{
-       int res = 0;
-       if (kwq->kw_pre_intrcount != 0 &&
-               kwq->kw_pre_intrtype == type &&
-               (kwq->kw_pre_intrseq == 0 || is_seqlower_eq(lockseq, kwq->kw_pre_intrseq))) {
-               kwq->kw_pre_intrcount--;
-               *retval = kwq->kw_pre_intrretbits;
-               if (kwq->kw_pre_intrcount == 0) {
-                       CLEAR_INTR_PREPOST_BITS(kwq);
-               }
-               res = 1;
-       }
-       return res;
-}
-
-static int
-_ksyn_handle_overlap(ksyn_wait_queue_t kwq,
-                    uint32_t lgenval,
-                    uint32_t rw_wc,
-                    uint32_t *retval)
+/*
+ * psynch_mutexwait: This system call is used for contended psynch mutexes to
+ * block.
+ */
+int
+_psynch_mutexwait(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
+               uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval)
  {
-       int res = 0;
-
-       // check for overlap and no pending W bit (indicates writers)
-       if (kwq->kw_overlapwatch != 0 &&
-           (rw_wc & PTHRW_RWS_SAVEMASK) == 0 &&
-           (lgenval & PTH_RWL_WBIT) == 0) {
-               /* overlap is set, so no need to check for valid state for overlap */
+       ksyn_wait_queue_t kwq;
+       int error = 0;
+       int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
+                       == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+       int ins_flags = SEQFIT;
+       uint32_t lseq = (mgen & PTHRW_COUNT_MASK);
+       uint32_t updatebits = 0;
+       thread_t tid_th = THREAD_NULL, old_owner = THREAD_NULL;
  
-               if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
-                       /* increase the next expected seq by one */
-                       kwq->kw_nextseqword += PTHRW_INC;
-                       /* set count by one & bits from the nextseq and add M bit */
-                       *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
-                       res = 1;
-               }
+       if (firstfit) {
+               /* first fit */
+               ins_flags = FIRSTFIT;
         }
-       return res;
-}
  
-static int
-_ksyn_handle_prepost(ksyn_wait_queue_t kwq,
-                    uint32_t type,
-                    uint32_t lockseq,
-                    uint32_t *retval)
-{
-       int res = 0;
-       if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) {
-               kwq->kw_pre_rwwc--;
-               if (kwq->kw_pre_rwwc == 0) {
-                       uint32_t preseq = kwq->kw_pre_lockseq;
-                       uint32_t prerw_wc = kwq->kw_pre_sseq;
-                       CLEAR_PREPOST_BITS(kwq);
-                       if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
-                               kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
-                       }
+       error = ksyn_wqfind(mutex, mgen, ugen, 0, flags,
+                       (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX), &kwq);
+       if (error != 0) {
+               return error;
+       }
  
-                       int error, block;
-                       uint32_t updatebits;
-                       error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq);
-                       if (error != 0) {
-                               panic("kwq_handle_unlock failed %d\n", error);
-                       }
+again:
+       ksyn_wqlock(kwq);
  
-                       if (block == 0) {
-                               *retval = updatebits;
-                               res = 1;
-                       }
-               }
+       if (_kwq_handle_interrupted_wakeup(kwq, KWQ_INTR_WRITE, lseq, retval)) {
+               old_owner = _kwq_set_owner(kwq, current_thread(), 0);
+               pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
+                               &kwq->kw_turnstile);
+               ksyn_wqunlock(kwq);
+               _kwq_cleanup_old_owner(&old_owner);
+               goto out;
         }
-       return res;
-}
  
-/* Helpers for QoS override management. Only applies to mutexes */
-static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost)
-{
-       if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-               boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-               int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread());
-               
-               kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override);
-               
-               if (prepost && kwq->kw_inqueue == 0) {
-                       // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an
-                       // override, because the receiving owner may not re-enter the kernel to signal someone else if it is
-                       // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner
-                       tid = 0;
-               }
-               
-               if (tid != 0) {
-                       if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) {
-                               // hint continues to be accurate, and a boost was already applied
-                               pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, FALSE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                       } else {
-                               // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously
-                               boolean_t boostsucceded;
-                               
-                               boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                               
-                               if (boostsucceded) {
-                                       kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
-                               }
-
-                               if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) {
-                                       // the hint did not match the previous owner, so drop overrides
-                                       PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-                                       pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                               }
-                       }
-               } else {
-                       // new hint tells us that we don't know the owner, so drop any existing overrides
-                       kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-                       kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-
-                       if (wasboosted && (kwq->kw_owner != 0)) {
-                               // the hint did not match the previous owner, so drop overrides
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-                               pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
+       if (kwq->kw_prepost.count && (firstfit || (lseq == kwq->kw_prepost.lseq))) {
+               /* got preposted lock */
+               kwq->kw_prepost.count--;
+
+               if (!firstfit) {
+                       if (kwq->kw_prepost.count > 0) {
+                               __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
+                               kwq->kw_prepost.lseq += PTHRW_INC; /* look for next one */
+                               ksyn_wqunlock(kwq);
+                               error = EINVAL;
+                               goto out;
                         }
+                       _kwq_clear_preposted_wakeup(kwq);
                 }
-       }
-}
  
-static boolean_t
-ksyn_mtx_transfer_qos_override_begin(ksyn_wait_queue_t kwq,
-               ksyn_waitq_element_t kwe, uint64_t *kw_owner)
-{
-       boolean_t needs_commit = FALSE;
-       if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-               boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-
-               if (kwq->kw_inqueue > 1) {
-                       boolean_t boostsucceeded;
-
-                       // More than one waiter, so resource will still be contended after handing off ownership
-                       boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-
-                       if (boostsucceeded) {
-                               kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
-                       }
+               if (kwq->kw_inqueue == 0) {
+                       updatebits = lseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
                 } else {
-                       // kw_inqueue == 1 to get to this point, which means there will be no contention after this point
-                       kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-                       kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-               }
-
-               // Remove the override that was applied to kw_owner. There may have been a race,
-               // in which case it may not match the current thread
-               if (wasboosted) {
-                       if (kwq->kw_owner == 0) {
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
-                       } else if (thread_tid(current_thread()) != kwq->kw_owner) {
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-                               *kw_owner = kwq->kw_owner;
-                               needs_commit = TRUE;
-                       } else {
-                               *kw_owner = 0;
-                               needs_commit = TRUE;
-                       }
+                       updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
+                                       (PTH_RWL_KBIT | PTH_RWL_EBIT);
                 }
-       }
-       return needs_commit;
-}
-
-static void
-ksyn_mtx_transfer_qos_override_commit(ksyn_wait_queue_t kwq, uint64_t kw_owner)
-{
-       struct uthread *uthread = kw_owner ? NULL : current_uthread();
-
-       pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(
-                       current_task(), uthread, kw_owner, kwq->kw_addr,
-                       THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-}
+               updatebits &= ~PTH_RWL_MTX_WAIT;
  
-static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq)
-{
-       if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-               boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-               
-               // assume nobody else in queue if this routine was called
-               kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-               kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-               
-               // Remove the override that was applied to kw_owner. There may have been a race,
-               // in which case it may not match the current thread
-               if (wasboosted) {
-                       if (kwq->kw_owner == 0) {
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
-                       } else if (thread_tid(current_thread()) != kwq->kw_owner) {
-                               PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-                               pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                       } else {
-                               pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-                       }
+               if (updatebits == 0) {
+                       __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
                 }
-       }
-}
  
-/*
- * psynch_mutexwait: This system call is used for contended psynch mutexes to block.
- */
+               PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+                               kwq->kw_prepost.lseq, kwq->kw_prepost.count, 1);
  
-int
-_psynch_mutexwait(__unused proc_t p,
-                 user_addr_t mutex,
-                 uint32_t mgen,
-                 uint32_t ugen,
-                 uint64_t tid,
-                 uint32_t flags,
-                 uint32_t *retval)
-{
-       ksyn_wait_queue_t kwq;
-       int error=0;
-       int ins_flags;
-
-       int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
-       uint32_t updatebits = 0;
-
-       uint32_t lockseq = (mgen & PTHRW_COUNT_MASK);
-       
-       if (firstfit == 0) {
-               ins_flags = SEQFIT;
-       } else {
-               /* first fit */
-               ins_flags = FIRSTFIT;
-       }
-       
-       error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
-       if (error != 0) {
-               return(error);
+               old_owner = _kwq_set_owner(kwq, current_thread(), 0);
+               pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
+                               &kwq->kw_turnstile);
+               
+               ksyn_wqunlock(kwq);
+               _kwq_cleanup_old_owner(&old_owner);
+               *retval = updatebits;
+               goto out;
         }
-       
-       ksyn_wqlock(kwq);
  
-       // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the
-       // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond
-       // to a stale snapshot after the lock has subsequently been unlocked by another thread.
-       if (tid == 0) {
+       // mutexwait passes in an owner hint at the time userspace contended for
+       // the mutex, however, the owner tid in the userspace data structure may be
+       // unset or SWITCHING (-1), or it may correspond to a stale snapshot after
+       // the lock has subsequently been unlocked by another thread.
+       if (tid == thread_tid(kwq->kw_owner)) {
+               // userspace and kernel agree
+       } else if (tid == 0) {
                 // contender came in before owner could write TID
-               tid = 0;
-       } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) {
-               // owner is stale, someone has come in and unlocked since this contended read the TID, so
-               // assume what is known in the kernel is accurate
-               tid = kwq->kw_owner;
+               // let's assume that what the kernel knows is accurate
+               // for all we know this waiter came in late in the kernel
+       } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT &&
+                          is_seqlower(ugen, kwq->kw_lastunlockseq)) {
+               // owner is stale, someone has come in and unlocked since this
+               // contended read the TID, so assume what is known in the kernel is
+               // accurate
         } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
-               // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't
-               // reached the kernel yet. So assume what is known in the kernel is accurate
-               tid = kwq->kw_owner;
+               // userspace didn't know the owner because it was being unlocked, but
+               // that unlocker hasn't reached the kernel yet. So assume what is known
+               // in the kernel is accurate
         } else {
-               // hint is being passed in for a specific thread, and we have no reason not to trust
-               // it (like the kernel unlock sequence being higher
-       }
-
-       
-       if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) {
-               ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
-               kwq->kw_owner = thread_tid(current_thread());
-
-               ksyn_wqunlock(kwq);
-               goto out;
-       }
-       
-       if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
-               /* got preposted lock */
-               kwq->kw_pre_rwwc--;
-               if (kwq->kw_pre_rwwc == 0) {
-                       CLEAR_PREPOST_BITS(kwq);
-                       if (kwq->kw_inqueue == 0) {
-                               updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
-                       } else {
-                               updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
-                       }
-                       updatebits &= ~PTH_RWL_MTX_WAIT;
-                       
-                       if (updatebits == 0) {
-                               __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
-                       }
-                       
-                       ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
-                       kwq->kw_owner = thread_tid(current_thread());
-            
-                       ksyn_wqunlock(kwq);
-                       *retval = updatebits;
-                       goto out;
-               } else {
-                       __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
-                       kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
+               // hint is being passed in for a specific thread, and we have no reason
+               // not to trust it (like the kernel unlock sequence being higher)
+               //
+               // So resolve the hint to a thread_t if we haven't done so yet
+               // and redrive as we dropped the lock
+               if (tid_th == THREAD_NULL) {
                         ksyn_wqunlock(kwq);
-                       error = EINVAL;
-                       goto out;
+                       tid_th = pthread_kern->task_findtid(current_task(), tid);
+                       if (tid_th == THREAD_NULL) tid = 0;
+                       goto again;
                 }
+               tid_th = _kwq_set_owner(kwq, tid_th, KWQ_SET_OWNER_TRANSFER_REF);
         }
-       
-       ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
-       kwq->kw_owner = tid;
  
-       error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue, kThreadWaitPThreadMutex);
+       if (tid_th) {
+               // We are on our way to block, and can't drop the spinlock anymore
+               pthread_kern->thread_deallocate_safe(tid_th);
+               tid_th = THREAD_NULL;
+       }
+       error = ksyn_wait(kwq, KSYN_QUEUE_WRITE, mgen, ins_flags, 0, 0,
+                       psynch_mtxcontinue, kThreadWaitPThreadMutex);
         // ksyn_wait drops wait queue lock
  out:
-       ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
+       pthread_kern->psynch_wait_cleanup();
+       ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
+       if (tid_th) {
+               thread_deallocate(tid_th);
+       }
         return error;
  }
  
-void
+void __dead2
  psynch_mtxcontinue(void *parameter, wait_result_t result)
  {
         uthread_t uth = current_uthread();
         ksyn_wait_queue_t kwq = parameter;
         ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-       
+
+       ksyn_wqlock(kwq);
+
         int error = _wait_result_to_errno(result);
         if (error != 0) {
-               ksyn_wqlock(kwq);
                 if (kwe->kwe_kwqqueue) {
-                       ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
+                       ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
                 }
-               ksyn_wqunlock(kwq);
         } else {
                 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
                 pthread_kern->uthread_set_returnval(uth, updatebits);
-               
-               if (updatebits == 0)
+
+               if (updatebits == 0) {
                         __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
+               }
         }
-       ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
+
+       pthread_kern->psynch_wait_complete(kwq, &kwq->kw_turnstile);
+
+       ksyn_wqunlock(kwq);
+       pthread_kern->psynch_wait_cleanup();
+       ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
         pthread_kern->unix_syscall_return(error);
+       __builtin_unreachable();
+}
+
+static void __dead2
+_psynch_rw_continue(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
+               wait_result_t result)
+{
+       uthread_t uth = current_uthread();
+       ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
+
+       ksyn_wqlock(kwq);
+
+       int error = _wait_result_to_errno(result);
+       if (error != 0) {
+               if (kwe->kwe_kwqqueue) {
+                       ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
+               }
+       } else {
+               pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
+       }
+
+       ksyn_wqunlock(kwq);
+       ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
+
+       pthread_kern->unix_syscall_return(error);
+       __builtin_unreachable();
+}
+
+void __dead2
+psynch_rw_rdcontinue(void *parameter, wait_result_t result)
+{
+       _psynch_rw_continue(parameter, KSYN_QUEUE_READ, result);
+}
+
+void __dead2
+psynch_rw_wrcontinue(void *parameter, wait_result_t result)
+{
+       _psynch_rw_continue(parameter, KSYN_QUEUE_WRITE, result);
  }
  
  /*
   * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
   */
  int
-_psynch_mutexdrop(__unused proc_t p,
-                 user_addr_t mutex,
-                 uint32_t mgen,
-                 uint32_t ugen,
-                 uint64_t tid __unused,
-                 uint32_t flags,
-                 uint32_t *retval)
+_psynch_mutexdrop(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
+               uint32_t ugen, uint64_t tid __unused, uint32_t flags, uint32_t *retval)
  {
         int res;
         ksyn_wait_queue_t kwq;
-       
+
         res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
         if (res == 0) {
                 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
@@ -821,65 +873,57 @@ _psynch_mutexdrop(__unused proc_t p,
  }
  
  static kern_return_t
-ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval)
+ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe,
+               uint32_t updateval, thread_t *old_owner)
  {
         kern_return_t ret;
-       boolean_t needs_commit;
-       uint64_t kw_owner;
  
         if (!kwe) {
-               kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist);
+               kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_kwelist);
                 if (!kwe) {
                         panic("ksyn_mtxsignal: panic signaling empty queue");
                 }
         }
  
-       needs_commit = ksyn_mtx_transfer_qos_override_begin(kwq, kwe, &kw_owner);
-       kwq->kw_owner = kwe->kwe_tid;
-
-       ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval);
+       PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_START, kwq->kw_addr, kwe,
+                       thread_tid(kwe->kwe_thread), kwq->kw_inqueue);
  
-       // if waking the new owner failed, remove any overrides
-       if (ret != KERN_SUCCESS) {
-               ksyn_mtx_drop_qos_override(kwq);
-               kwq->kw_owner = 0;
-       } else if (needs_commit) {
-               ksyn_mtx_transfer_qos_override_commit(kwq, kw_owner);
+       ret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, kwe, updateval);
+       if (ret == KERN_SUCCESS) {
+               *old_owner = _kwq_set_owner(kwq, kwe->kwe_thread, 0);
+       } else {
+               *old_owner = _kwq_clear_owner(kwq);
         }
+       PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_END, kwq->kw_addr, kwe,
+                       ret, 0);
         return ret;
  }
  
  
  static void
-ksyn_prepost(ksyn_wait_queue_t kwq,
-            ksyn_waitq_element_t kwe,
-            uint32_t state,
+ksyn_prepost(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t state,
              uint32_t lockseq)
  {
         bzero(kwe, sizeof(*kwe));
         kwe->kwe_state = state;
         kwe->kwe_lockseq = lockseq;
         kwe->kwe_count = 1;
-       
-       (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT);
+
+       (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITE, kwe, lockseq, SEQFIT);
         kwq->kw_fakecount++;
  }
  
  static void
-ksyn_cvsignal(ksyn_wait_queue_t ckwq,
-             thread_t th,
-             uint32_t uptoseq,
-             uint32_t signalseq,
-             uint32_t *updatebits,
-             int *broadcast,
-             ksyn_waitq_element_t *nkwep)
+ksyn_cvsignal(ksyn_wait_queue_t ckwq, thread_t th, uint32_t uptoseq,
+               uint32_t signalseq, uint32_t *updatebits, int *broadcast,
+               ksyn_waitq_element_t *nkwep)
  {
         ksyn_waitq_element_t kwe = NULL;
         ksyn_waitq_element_t nkwe = NULL;
-       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
-       
+       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
+
         uptoseq &= PTHRW_COUNT_MASK;
-       
+
         // Find the specified thread to wake.
         if (th != THREAD_NULL) {
                 uthread_t uth = pthread_kern->get_bsdthread_info(th);
@@ -893,7 +937,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
                         return;
                 }
         }
-       
+
         // If no thread was specified, find any thread to wake (with the right
         // sequence number).
         while (th == THREAD_NULL) {
@@ -906,13 +950,13 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
                         // reacquiring the lock after allocation in
                         // case anything new shows up.
                         ksyn_wqunlock(ckwq);
-                       nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
+                       nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
                         ksyn_wqlock(ckwq);
                 } else {
                         break;
                 }
         }
-       
+
         if (kwe != NULL) {
                 // If we found a thread to wake...
                 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
@@ -928,7 +972,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
                                  */
                                 *broadcast = 1;
                         } else {
-                               (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
+                               (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
                                 *updatebits += PTHRW_INC;
                         }
                 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
@@ -944,7 +988,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
                          * If we allocated a new kwe above but then found a different kwe to
                          * use then we need to deallocate the spare one.
                          */
-                       pthread_kern->zfree(kwe_zone, nkwe);
+                       zfree(kwe_zone, nkwe);
                         nkwe = NULL;
                 }
         } else if (nkwe != NULL) {
@@ -954,19 +998,14 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
         } else {
                 panic("failed to allocate kwe\n");
         }
-       
+
         *nkwep = nkwe;
  }
  
  static int
-__psynch_cvsignal(user_addr_t cv,
-                 uint32_t cgen,
-                 uint32_t cugen,
-                 uint32_t csgen,
-                 uint32_t flags,
-                 int broadcast,
-                 mach_port_name_t threadport,
-                 uint32_t *retval)
+__psynch_cvsignal(user_addr_t cv, uint32_t cgen, uint32_t cugen,
+               uint32_t csgen, uint32_t flags, int broadcast,
+               mach_port_name_t threadport, uint32_t *retval)
  {
         int error = 0;
         thread_t th = THREAD_NULL;
@@ -997,11 +1036,16 @@ __psynch_cvsignal(user_addr_t cv,
                 
                 // update L, U and S...
                 UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
-               
+
+               PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_START, kwq->kw_addr,
+                               fromseq, uptoseq, broadcast);
+
                 if (!broadcast) {
                         // No need to signal if the CV is already balanced.
                         if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
-                               ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe);
+                               ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits,
+                                               &broadcast, &nkwe);
+                               PTHREAD_TRACE(psynch_cvar_signal, kwq->kw_addr, broadcast, 0,0);
                         }
                 }
                 
@@ -1013,11 +1057,16 @@ __psynch_cvsignal(user_addr_t cv,
                 // set C or P bits and free if needed
                 ksyn_cvupdate_fixup(kwq, &updatebits);
                 *retval = updatebits;
+
+               PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_END, kwq->kw_addr,
+                               updatebits, 0, 0);
                 
                 ksyn_wqunlock(kwq);
+
+               pthread_kern->psynch_wait_cleanup();
                 
                 if (nkwe != NULL) {
-                       pthread_kern->zfree(kwe_zone, nkwe);
+                       zfree(kwe_zone, nkwe);
                 }
                 
                 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
@@ -1034,15 +1083,9 @@ __psynch_cvsignal(user_addr_t cv,
   * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
   */
  int
-_psynch_cvbroad(__unused proc_t p,
-               user_addr_t cv,
-               uint64_t cvlsgen,
-               uint64_t cvudgen,
-               uint32_t flags,
-               __unused user_addr_t mutex,
-               __unused uint64_t mugen,
-               __unused uint64_t tid,
-               uint32_t *retval)
+_psynch_cvbroad(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+               uint64_t cvudgen, uint32_t flags, __unused user_addr_t mutex,
+               __unused uint64_t mugen, __unused uint64_t tid, uint32_t *retval)
  {
         uint32_t diffgen = cvudgen & 0xffffffff;
         uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
@@ -1062,15 +1105,9 @@ _psynch_cvbroad(__unused proc_t p,
   * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
   */
  int
-_psynch_cvsignal(__unused proc_t p,
-                user_addr_t cv,
-                uint64_t cvlsgen,
-                uint32_t cvugen,
-                int threadport,
-                __unused user_addr_t mutex,
-                __unused uint64_t mugen,
-                __unused uint64_t tid,
-                uint32_t flags,
+_psynch_cvsignal(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+                uint32_t cvugen, int threadport, __unused user_addr_t mutex,
+                __unused uint64_t mugen, __unused uint64_t tid, uint32_t flags,
                  uint32_t *retval)
  {
         uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
@@ -1083,16 +1120,9 @@ _psynch_cvsignal(__unused proc_t p,
   * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
   */
  int
-_psynch_cvwait(__unused proc_t p,
-              user_addr_t cv,
-              uint64_t cvlsgen,
-              uint32_t cvugen,
-              user_addr_t mutex,
-              uint64_t mugen,
-              uint32_t flags,
-              int64_t sec,
-              uint32_t nsec,
-              uint32_t *retval)
+_psynch_cvwait(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+               uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags,
+               int64_t sec, uint32_t nsec, uint32_t *retval)
  {
         int error = 0;
         uint32_t updatebits = 0;
@@ -1118,6 +1148,8 @@ _psynch_cvwait(__unused proc_t p,
                 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
                 return EINVAL;
         }
+
+       PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_START, cv, mutex, cgen, 0);
         
         error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
         if (error != 0) {
@@ -1125,7 +1157,8 @@ _psynch_cvwait(__unused proc_t p,
         }
         
         if (mutex != 0) {
-               error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL);
+               uint32_t mutexrv = 0;
+               error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, &mutexrv);
                 if (error != 0) {
                         goto out;
                 }
@@ -1137,7 +1170,7 @@ _psynch_cvwait(__unused proc_t p,
         UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
         
         /* Look for the sequence for prepost (or conflicting thread */
-       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
         kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
         if (kwe != NULL) {
                 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
@@ -1171,7 +1204,7 @@ _psynch_cvwait(__unused proc_t p,
                 }
                 
                 if (error == 0) {
-                       updatebits = PTHRW_INC;
+                       updatebits |= PTHRW_INC;
                         ckwq->kw_sword += PTHRW_INC;
                         
                         /* set C or P bits and free if needed */
@@ -1180,45 +1213,54 @@ _psynch_cvwait(__unused proc_t p,
                 }
         } else {
                 uint64_t abstime = 0;
+               uint16_t kwe_flags = 0;
  
                 if (sec != 0 || (nsec & 0x3fffffff) != 0) {
                         struct timespec ts;
                         ts.tv_sec = (__darwin_time_t)sec;
                         ts.tv_nsec = (nsec & 0x3fffffff);
-                       nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
+                       nanoseconds_to_absolutetime(
+                                       (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
                         clock_absolutetime_interval_to_deadline(abstime, &abstime);
                 }
+
+               PTHREAD_TRACE(psynch_cvar_kwait, cv, mutex, kwe_flags, 1);
                 
-               error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue, kThreadWaitPThreadCondVar);
+               error = ksyn_wait(ckwq, KSYN_QUEUE_WRITE, cgen, SEQFIT, abstime,
+                               kwe_flags, psynch_cvcontinue, kThreadWaitPThreadCondVar);
                 // ksyn_wait drops wait queue lock
         }
         
         ksyn_wqunlock(ckwq);
-       
+
         if (nkwe != NULL) {
-               pthread_kern->zfree(kwe_zone, nkwe);
+               zfree(kwe_zone, nkwe);
         }
  out:
+
+       PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, cv, error, updatebits, 2);
+
         ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
         return error;
  }
  
  
-void
+void __dead2
  psynch_cvcontinue(void *parameter, wait_result_t result)
  {
         uthread_t uth = current_uthread();
         ksyn_wait_queue_t ckwq = parameter;
         ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-       
+
         int error = _wait_result_to_errno(result);
         if (error != 0) {
                 ksyn_wqlock(ckwq);
                 /* just in case it got woken up as we were granting */
-               pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
+               int retval = kwe->kwe_psynchretval;
+               pthread_kern->uthread_set_returnval(uth, retval);
  
                 if (kwe->kwe_kwqqueue) {
-                       ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
+                       ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
                 }
                 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
                         /* the condition var granted.
@@ -1231,46 +1273,48 @@ psynch_cvcontinue(void *parameter, wait_result_t result)
                         
                         /* set C and P bits, in the local error */
                         if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
-                               error |= ECVCERORR;
+                               PTHREAD_TRACE(psynch_cvar_zeroed, ckwq->kw_addr,
+                                               ckwq->kw_lword, ckwq->kw_sword, ckwq->kw_inqueue);
+                               error |= ECVCLEARED;
                                 if (ckwq->kw_inqueue != 0) {
-                                       ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1);
+                                       ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 1);
                                 }
                                 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
                                 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
                         } else {
                                 /* everythig in the queue is a fake entry ? */
                                 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
-                                       error |= ECVPERORR;
+                                       error |= ECVPREPOST;
                                 }
                         }
                 }
                 ksyn_wqunlock(ckwq);
+
+               PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
+                               error, 0, 3);
         } else {
                 int val = 0;
                 // PTH_RWL_MTX_WAIT is removed
                 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
                         val = PTHRW_INC | PTH_RWS_CV_CBIT;
                 }
+               PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
+                               val, 0, 4);
                 pthread_kern->uthread_set_returnval(uth, val);
         }
         
         ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
         pthread_kern->unix_syscall_return(error);
+       __builtin_unreachable();
  }
  
  /*
   * psynch_cvclrprepost: This system call clears pending prepost if present.
   */
  int
-_psynch_cvclrprepost(__unused proc_t p,
-                    user_addr_t cv,
-                    uint32_t cvgen,
-                    uint32_t cvugen,
-                    uint32_t cvsgen,
-                    __unused uint32_t prepocnt,
-                    uint32_t preposeq,
-                    uint32_t flags,
-                    int *retval)
+_psynch_cvclrprepost(__unused proc_t p, user_addr_t cv, uint32_t cvgen,
+               uint32_t cvugen, uint32_t cvsgen, __unused uint32_t prepocnt,
+               uint32_t preposeq, uint32_t flags, int *retval)
  {
         int error = 0;
         int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
@@ -1279,7 +1323,8 @@ _psynch_cvclrprepost(__unused proc_t p,
         
         *retval = 0;
         
-       error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq);
+       error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype,
+                       &kwq);
         if (error != 0) {
                 return error;
         }
@@ -1287,16 +1332,19 @@ _psynch_cvclrprepost(__unused proc_t p,
         ksyn_wqlock(kwq);
         
         if (mutex) {
-               int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
-               if (firstfit && kwq->kw_pre_rwwc != 0) {
-                       if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) {
-                               // clear prepost
-                               kwq->kw_pre_rwwc = 0;
-                               kwq->kw_pre_lockseq = 0;
+               int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
+                               == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+               if (firstfit && kwq->kw_prepost.count) {
+                       if (is_seqlower_eq(kwq->kw_prepost.lseq, cvgen)) {
+                               PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+                                               kwq->kw_prepost.lseq, 0, 2);
+                               _kwq_clear_preposted_wakeup(kwq);
                         }
                 }
         } else {
-               ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0);
+               PTHREAD_TRACE(psynch_cvar_clrprepost, kwq->kw_addr, wqtype,
+                               preposeq, 0);
+               ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITE, preposeq, 0);
         }
         
         ksyn_wqunlock(kwq);
@@ -1307,50 +1355,47 @@ _psynch_cvclrprepost(__unused proc_t p,
  /* ***************** pthread_rwlock ************************ */
  
  static int
-__psynch_rw_lock(int type,
-                user_addr_t rwlock,
-                uint32_t lgenval,
-                uint32_t ugenval,
-                uint32_t rw_wc,
-                int flags,
-                uint32_t *retval)
+__psynch_rw_lock(int type, user_addr_t rwlock, uint32_t lgenval,
+                uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
  {
-       int prepost_type, kqi;
+       uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
+       ksyn_wait_queue_t kwq;
+       int error, prepost_type, kqi;
+       thread_continue_t tc;
  
         if (type == PTH_RW_TYPE_READ) {
                 prepost_type = KW_UNLOCK_PREPOST_READLOCK;
                 kqi = KSYN_QUEUE_READ;
+               tc = psynch_rw_rdcontinue;
         } else {
                 prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
-               kqi = KSYN_QUEUE_WRITER;
+               kqi = KSYN_QUEUE_WRITE;
+               tc = psynch_rw_wrcontinue;
         }
  
-       uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
+       error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
+                       (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq);
+       if (error != 0) {
+               return error;
+       }
  
-       int error;
-       ksyn_wait_queue_t kwq;
-       error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
-       if (error == 0) {
-               ksyn_wqlock(kwq);
-               _ksyn_check_init(kwq, lgenval);
-               if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) ||
-                   // handle overlap first as they are not counted against pre_rwwc
-                   (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) ||
-                   _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
-                       ksyn_wqunlock(kwq);
-               } else {
-                       block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
-                               kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
-                       error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL, block_hint);
-                       // ksyn_wait drops wait queue lock
-                       if (error == 0) {
-                               uthread_t uth = current_uthread();
-                               ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-                               *retval = kwe->kwe_psynchretval;
-                       }
-               }
-               ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
+       ksyn_wqlock(kwq);
+       _ksyn_check_init(kwq, lgenval);
+       if (_kwq_handle_interrupted_wakeup(kwq, type, lockseq, retval) ||
+                       // handle overlap first as they are not counted against pre_rwwc
+                       // handle_overlap uses the flags in lgenval (vs. lockseq)
+                       _kwq_handle_overlap(kwq, type, lgenval, rw_wc, retval) ||
+                       _kwq_handle_preposted_wakeup(kwq, prepost_type, lockseq, retval)) {
+               ksyn_wqunlock(kwq);
+               goto out;
         }
+
+       block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
+               kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
+       error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, 0, tc, block_hint);
+       // ksyn_wait drops wait queue lock
+out:
+       ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
         return error;
  }
  
@@ -1358,28 +1403,20 @@ __psynch_rw_lock(int type,
   * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
   */
  int
-_psynch_rw_rdlock(__unused proc_t p,
-                 user_addr_t rwlock,
-                 uint32_t lgenval,
-                 uint32_t ugenval,
-                 uint32_t rw_wc,
-                 int flags,
-                 uint32_t *retval)
+_psynch_rw_rdlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+               uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
  {
-       return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval);
+       return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc,
+                       flags, retval);
  }
  
  /*
   * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
   */
  int
-_psynch_rw_longrdlock(__unused proc_t p,
-                     __unused user_addr_t rwlock,
-                     __unused uint32_t lgenval,
-                     __unused uint32_t ugenval,
-                     __unused uint32_t rw_wc,
-                     __unused int flags,
-                     __unused uint32_t *retval)
+_psynch_rw_longrdlock(__unused proc_t p, __unused user_addr_t rwlock,
+               __unused uint32_t lgenval, __unused uint32_t ugenval,
+               __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
  {
         return ESRCH;
  }
@@ -1389,28 +1426,20 @@ _psynch_rw_longrdlock(__unused proc_t p,
   * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
   */
  int
-_psynch_rw_wrlock(__unused proc_t p,
-                 user_addr_t rwlock,
-                 uint32_t lgenval,
-                 uint32_t ugenval,
-                 uint32_t rw_wc,
-                 int flags,
-                 uint32_t *retval)
+_psynch_rw_wrlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+               uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
  {
-       return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval);
+       return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval,
+                       rw_wc, flags, retval);
  }
  
  /*
   * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
   */
  int
-_psynch_rw_yieldwrlock(__unused proc_t p,
-                      __unused user_addr_t rwlock,
-                      __unused uint32_t lgenval,
-                      __unused uint32_t ugenval,
-                      __unused uint32_t rw_wc,
-                      __unused int flags,
-                      __unused uint32_t *retval)
+_psynch_rw_yieldwrlock(__unused proc_t p, __unused user_addr_t rwlock,
+               __unused uint32_t lgenval, __unused uint32_t ugenval,
+               __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
  {
         return ESRCH;
  }
@@ -1420,13 +1449,8 @@ _psynch_rw_yieldwrlock(__unused proc_t p,
   *                     reader/writer variety lock.
   */
  int
-_psynch_rw_unlock(__unused proc_t p,
-                 user_addr_t rwlock,
-                 uint32_t lgenval,
-                 uint32_t ugenval,
-                 uint32_t rw_wc,
-                 int flags,
-                 uint32_t *retval)
+_psynch_rw_unlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+               uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
  {
         int error = 0;
         ksyn_wait_queue_t kwq;
@@ -1436,7 +1460,8 @@ _psynch_rw_unlock(__unused proc_t p,
         uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
         int clearedkflags = 0;
  
-       error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
+       error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
+                       (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
         if (error != 0) {
                 return(error);
         }
@@ -1445,7 +1470,8 @@ _psynch_rw_unlock(__unused proc_t p,
         int isinit = _ksyn_check_init(kwq, lgenval);
  
         /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
-       if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
+       if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) &&
+                       (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
                 error = 0;
                 goto out;
         }
@@ -1466,7 +1492,7 @@ _psynch_rw_unlock(__unused proc_t p,
         
         /* can handle unlock now */
         
-       CLEAR_PREPOST_BITS(kwq);
+       _kwq_clear_preposted_wakeup(kwq);
         
         error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
  #if __TESTPANICS__
@@ -1479,26 +1505,25 @@ out:
                 *retval = updatebits;
         }
  
-       // <rdar://problem/22244050> If any of the wakeups failed because they already
-       // returned to userspace because of a signal then we need to ensure that the
-       // reset state is not cleared when that thread returns. Otherwise,
+       // <rdar://problem/22244050> If any of the wakeups failed because they
+       // already returned to userspace because of a signal then we need to ensure
+       // that the reset state is not cleared when that thread returns. Otherwise,
         // _pthread_rwlock_lock will clear the interrupted state before it is read.
-       if (clearedkflags != 0 && kwq->kw_pre_intrcount > 0) {
+       if (clearedkflags != 0 && kwq->kw_intr.count > 0) {
                 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
         }
         
         ksyn_wqunlock(kwq);
+       pthread_kern->psynch_wait_cleanup();
         ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
         
         return(error);
         
  prepost:
         /* update if the new seq is higher than prev prepost, or first set */
-       if (is_rws_setseq(kwq->kw_pre_sseq) ||
-           is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) {
-               kwq->kw_pre_rwwc = (diff - count);
-               kwq->kw_pre_lockseq = curgen;
-               kwq->kw_pre_sseq = rw_wc;
+       if (is_rws_sbit_set(kwq->kw_prepost.sseq) ||
+                       is_seqhigher_eq(rw_wc, kwq->kw_prepost.sseq)) {
+               _kwq_mark_preposted_wakeup(kwq, diff - count, curgen, rw_wc);
                 updatebits = lgenval;   /* let this not do unlock handling */
         }
         error = 0;
@@ -1526,13 +1551,9 @@ _pth_proc_hashinit(proc_t p)
  
  
  static int
-ksyn_wq_hash_lookup(user_addr_t uaddr,
-                   proc_t p,
-                   int flags,
-                   ksyn_wait_queue_t *out_kwq,
-                   struct pthhashhead **out_hashptr,
-                   uint64_t *out_object,
-                   uint64_t *out_offset)
+ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags,
+               ksyn_wait_queue_t *out_kwq, struct pthhashhead **out_hashptr,
+               uint64_t *out_object, uint64_t *out_offset)
  {
         int res = 0;
         ksyn_wait_queue_t kwq;
@@ -1593,9 +1614,8 @@ _pth_proc_hashdelete(proc_t p)
                         pthread_list_unlock();
                         /* release fake entries if present for cvars */
                         if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
-                               ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
-                       lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
-                       pthread_kern->zfree(kwq_zone, kwq);
+                               ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE]);
+                       _kwq_destroy(kwq);
                         pthread_list_lock();
                 }
         }
@@ -1611,14 +1631,49 @@ ksyn_freeallkwe(ksyn_queue_t kq)
         while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
                 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
                 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
-                       pthread_kern->zfree(kwe_zone, kwe);
+                       zfree(kwe_zone, kwe);
                 }
         }
  }
  
+static inline void
+_kwq_report_inuse(ksyn_wait_queue_t kwq)
+{
+       if (kwq->kw_prepost.count != 0) {
+               __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [pre %d:0x%x:0x%x]",
+                               (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_prepost.count,
+                               kwq->kw_prepost.lseq, kwq->kw_prepost.sseq);
+               PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+                               kwq->kw_type, 1, 0);
+       }
+       if (kwq->kw_intr.count != 0) {
+               __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [intr %d:0x%x:0x%x:0x%x]",
+                               (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_intr.count,
+                               kwq->kw_intr.type, kwq->kw_intr.seq,
+                               kwq->kw_intr.returnbits);
+               PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+                               kwq->kw_type, 2, 0);
+       }
+       if (kwq->kw_iocount) {
+               __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [ioc %d:%d]",
+                               (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_iocount,
+                               kwq->kw_dropcount);
+               PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+                               kwq->kw_type, 3, 0);
+       }
+       if (kwq->kw_inqueue) {
+               __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [inq %d:%d]",
+                               (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_inqueue,
+                               kwq->kw_fakecount);
+               PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, kwq->kw_type,
+                               4, 0);
+       }
+}
+
  /* find kernel waitqueue, if not present create one. Grants a reference  */
  int
-ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp)
+ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen,
+               int flags, int wqtype, ksyn_wait_queue_t *kwqp)
  {
         int res = 0;
         ksyn_wait_queue_t kwq = NULL;
@@ -1636,7 +1691,8 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
  
         while (res == 0) {
                 pthread_list_lock();
-               res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset);
+               res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr,
+                               &object, &offset);
                 if (res != 0) {
                         pthread_list_unlock();
                         break;
@@ -1645,13 +1701,13 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
                         // Drop the lock to allocate a new kwq and retry.
                         pthread_list_unlock();
  
-                       nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone);
+                       nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone);
                         bzero(nkwq, sizeof(struct ksyn_wait_queue));
                         int i;
                         for (i = 0; i < KSYN_QUEUE_MAX; i++) {
                                 ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
                         }
-                       lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
+                       lck_spin_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
                         continue;
                 } else if (kwq == NULL && nkwq != NULL) {
                         // Still not found, add the new kwq to the hash.
@@ -1671,21 +1727,23 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
                                 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
                         }
                         if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
-                               if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) {
+                               if (!_kwq_is_used(kwq)) {
                                         if (kwq->kw_iocount == 0) {
                                                 kwq->kw_type = 0; // mark for reinitialization
-                                       } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) {
+                                       } else if (kwq->kw_iocount == 1 &&
+                                                       kwq->kw_dropcount == kwq->kw_iocount) {
                                                 /* if all users are unlockers then wait for it to finish */
                                                 kwq->kw_pflags |= KSYN_WQ_WAITING;
                                                 // Drop the lock and wait for the kwq to be free.
-                                               (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
+                                               (void)msleep(&kwq->kw_pflags, pthread_list_mlock,
+                                                               PDROP, "ksyn_wqfind", 0);
                                                 continue;
                                         } else {
-                                               __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
+                                               _kwq_report_inuse(kwq);
                                                 res = EINVAL;
                                         }
                                 } else {
-                                       __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
+                                       _kwq_report_inuse(kwq);
                                         res = EINVAL;
                                 }
                         }
@@ -1700,9 +1758,13 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
                                 kwq->kw_lword = mgen;
                                 kwq->kw_uword = ugen;
                                 kwq->kw_sword = sgen;
-                               kwq->kw_owner = 0;
+                               kwq->kw_owner = THREAD_NULL;
                                 kwq->kw_kflags = 0;
                                 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
+                               PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_START, uaddr,
+                                               kwq->kw_type, kwq, 0);
+                               PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_END, uaddr,
+                                               mgen, ugen, sgen);
                         }
                         kwq->kw_iocount++;
                         if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
@@ -1716,8 +1778,7 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
                 *kwqp = kwq;
         }
         if (nkwq) {
-               lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
-               pthread_kern->zfree(kwq_zone, nkwq);
+               _kwq_destroy(nkwq);
         }
         return res;
  }
@@ -1740,7 +1801,16 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
                         wakeup(&kwq->kw_pflags);
                 }
                 
-               if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) {
+               if (!_kwq_is_used(kwq)) {
+                       if (kwq->kw_turnstile) {
+                               panic("kw_turnstile still non-null upon release");
+                       }
+
+                       PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_START,
+                                       kwq->kw_addr, kwq->kw_type, qfreenow, 0);
+                       PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_END,
+                                       kwq->kw_addr, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword);
+
                         if (qfreenow == 0) {
                                 microuptime(&kwq->kw_ts);
                                 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
@@ -1762,8 +1832,7 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
         }
         pthread_list_unlock();
         if (free_elem != NULL) {
-               lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp);
-               pthread_kern->zfree(kwq_zone, free_elem);
+               _kwq_destroy(free_elem);
         }
  }
  
@@ -1771,7 +1840,7 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
  void
  psynch_wq_cleanup(__unused void *param, __unused void * param1)
  {
-       ksyn_wait_queue_t kwq;
+       ksyn_wait_queue_t kwq, tmp;
         struct timeval t;
         int reschedule = 0;
         uint64_t deadline = 0;
@@ -1783,7 +1852,7 @@ psynch_wq_cleanup(__unused void *param, __unused void * param1)
         microuptime(&t);
         
         LIST_FOREACH(kwq, &pth_free_list, kw_list) {
-               if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) {
+               if (_kwq_is_used(kwq) || kwq->kw_iocount != 0) {
                         // still in use
                         continue;
                 }
@@ -1810,10 +1879,8 @@ psynch_wq_cleanup(__unused void *param, __unused void * param1)
         }
         pthread_list_unlock();
  
-       while ((kwq = LIST_FIRST(&freelist)) != NULL) {
-               LIST_REMOVE(kwq, kw_list);
-               lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
-               pthread_kern->zfree(kwq_zone, kwq);
+       LIST_FOREACH_SAFE(kwq, &freelist, kw_list, tmp) {
+               _kwq_destroy(kwq);
         }
  }
  
@@ -1833,25 +1900,25 @@ _wait_result_to_errno(wait_result_t result)
  }
  
  int
-ksyn_wait(ksyn_wait_queue_t kwq,
-         int kqi,
-         uint32_t lockseq,
-         int fit,
-         uint64_t abstime,
-         thread_continue_t continuation,
-         block_hint_t block_hint)
+ksyn_wait(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, uint32_t lockseq,
+               int fit, uint64_t abstime, uint16_t kwe_flags,
+               thread_continue_t continuation, block_hint_t block_hint)
  {
-       int res;
-
         thread_t th = current_thread();
         uthread_t uth = pthread_kern->get_bsdthread_info(th);
+       struct turnstile **tstore = NULL;
+       int res;
+
+       assert(continuation != THREAD_CONTINUE_NULL);
+
         ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
         bzero(kwe, sizeof(*kwe));
         kwe->kwe_count = 1;
         kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
         kwe->kwe_state = KWE_THREAD_INWAIT;
         kwe->kwe_uth = uth;
-       kwe->kwe_tid = thread_tid(th);
+       kwe->kwe_thread = th;
+       kwe->kwe_flags = kwe_flags;
  
         res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
         if (res != 0) {
@@ -1859,43 +1926,39 @@ ksyn_wait(ksyn_wait_queue_t kwq,
                 ksyn_wqunlock(kwq);
                 return res;
         }
-       
-       thread_set_pending_block_hint(th, block_hint);
-       assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
+
+       PTHREAD_TRACE(psynch_mutex_kwqwait, kwq->kw_addr, kwq->kw_inqueue,
+                       kwq->kw_prepost.count, kwq->kw_intr.count);
+
+       if (_kwq_use_turnstile(kwq)) {
+               // pthread mutexes and rwlocks both (at least sometimes) know their
+               // owner and can use turnstiles. Otherwise, we pass NULL as the
+               // tstore to the shims so they wait on the global waitq.
+               tstore = &kwq->kw_turnstile;
+       }
+
+       pthread_kern->psynch_wait_prepare((uintptr_t)kwq, tstore, kwq->kw_owner,
+                       block_hint, abstime);
+
         ksyn_wqunlock(kwq);
-       
-       kern_return_t ret;
-       if (continuation == THREAD_CONTINUE_NULL) {
-               ret = thread_block(NULL);
-       } else {
-               ret = thread_block_parameter(continuation, kwq);
-               
-               // If thread_block_parameter returns (interrupted) call the
-               // continuation manually to clean up.
-               continuation(kwq, ret);
-               
-               // NOT REACHED
-               panic("ksyn_wait continuation returned");
+
+       if (tstore) {
+               pthread_kern->psynch_wait_update_complete(kwq->kw_turnstile);
         }
         
-       res = _wait_result_to_errno(ret);
-       if (res != 0) {
-               ksyn_wqlock(kwq);
-               if (kwe->kwe_kwqqueue) {
-                       ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
-               }
-               ksyn_wqunlock(kwq);
-       }
-       return res;
+       thread_block_parameter(continuation, kwq);
+
+       // NOT REACHED
+       panic("ksyn_wait continuation returned");
+       __builtin_unreachable();
  }
  
  kern_return_t
-ksyn_signal(ksyn_wait_queue_t kwq,
-           int kqi,
-           ksyn_waitq_element_t kwe,
-           uint32_t updateval)
+ksyn_signal(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
+               ksyn_waitq_element_t kwe, uint32_t updateval)
  {
         kern_return_t ret;
+       struct turnstile **tstore = NULL;
  
         // If no wait element was specified, wake the first.
         if (!kwe) {
@@ -1912,7 +1975,12 @@ ksyn_signal(ksyn_wait_queue_t kwq,
         ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
         kwe->kwe_psynchretval = updateval;
  
-       ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
+       if (_kwq_use_turnstile(kwq)) {
+               tstore = &kwq->kw_turnstile;
+       }
+
+       ret = pthread_kern->psynch_wait_wakeup(kwq, kwe, tstore);
+
         if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
                 panic("ksyn_signal: panic waking up thread %x\n", ret);
         }
@@ -1925,7 +1993,8 @@ ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
         kern_return_t ret;
         vm_page_info_basic_data_t info;
         mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
-       ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
+       ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr,
+                       VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
         if (ret != KERN_SUCCESS) {
                 return EINVAL;
         }
@@ -1943,20 +2012,22 @@ ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
  
  /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
  int
-kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[])
+kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen,
+               int *typep, uint32_t lowest[])
  {
         uint32_t kw_fr, kw_fwr, low;
         int type = 0, lowtype, typenum[2] = { 0 };
         uint32_t numbers[2] = { 0 };
         int count = 0, i;
         
-       
-       if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
+       if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) ||
+                       ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
                 type |= PTH_RWSHFT_TYPE_READ;
                 /* read entries are present */
                 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
                         kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
-                       if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
+                       if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) &&
+                                       (is_seqlower(premgen, kw_fr) != 0))
                                 kw_fr = premgen;
                 } else
                         kw_fr = premgen;
@@ -1968,22 +2039,24 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type
         } else
                 lowest[KSYN_QUEUE_READ] = 0;
         
-       if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
+       if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) ||
+                       ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
                 type |= PTH_RWSHFT_TYPE_WRITE;
                 /* read entries are present */
-               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
-                       kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
-                       if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
+               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
+                       kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum;
+                       if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) &&
+                                       (is_seqlower(premgen, kw_fwr) != 0))
                                 kw_fwr = premgen;
                 } else
                         kw_fwr = premgen;
                 
-               lowest[KSYN_QUEUE_WRITER] = kw_fwr;
+               lowest[KSYN_QUEUE_WRITE] = kw_fwr;
                 numbers[count]= kw_fwr;
                 typenum[count] = PTH_RW_TYPE_WRITE;
                 count++;
         } else
-               lowest[KSYN_QUEUE_WRITER] = 0;
+               lowest[KSYN_QUEUE_WRITE] = 0;
         
  #if __TESTPANICS__
         if (count == 0)
@@ -2009,7 +2082,8 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type
  
  /* wakeup readers to upto the writer limits */
  int
-ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp)
+ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders,
+               uint32_t updatebits, int *wokenp)
  {
         ksyn_queue_t kq;
         int failedwakeup = 0;
@@ -2020,7 +2094,8 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, ui
         lbits = updatebits;
         
         kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
-       while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
+       while ((kq->ksynq_count != 0) &&
+                       (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
                 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
                 if (kret == KERN_NOT_WAITING) {
                         failedwakeup++;
@@ -2034,19 +2109,17 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, ui
  }
  
  
-/* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
+/*
+ * This handles the unlock grants for next set on rw_unlock() or on arrival
+ * of all preposted waiters.
+ */
  int
-kwq_handle_unlock(ksyn_wait_queue_t kwq,
-                 __unused uint32_t mgen,
-                 uint32_t rw_wc,
-                 uint32_t *updatep,
-                 int flags,
-                 int *blockp,
-                 uint32_t premgen)
+kwq_handle_unlock(ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t rw_wc,
+               uint32_t *updatep, int flags, int *blockp, uint32_t premgen)
  {
         uint32_t low_writer, limitrdnum;
         int rwtype, error=0;
-       int allreaders, failed;
+       int allreaders, nfailed;
         uint32_t updatebits=0, numneeded = 0;;
         int prepost = flags & KW_UNLOCK_PREPOST;
         thread_t preth = THREAD_NULL;
@@ -2067,7 +2140,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
         kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
         kwq->kw_lastseqword = rw_wc;
         kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
-       kwq->kw_overlapwatch = 0;
+       kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
         
         error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
  #if __TESTPANICS__
@@ -2075,7 +2148,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                 panic("rwunlock: cannot fails to slot next round of threads");
  #endif /* __TESTPANICS__ */
         
-       low_writer = lowest[KSYN_QUEUE_WRITER];
+       low_writer = lowest[KSYN_QUEUE_WRITE];
         
         allreaders = 0;
         updatebits = 0;
@@ -2108,7 +2181,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                         } else {
                                 // no writers at all
                                 // no other waiters only readers
-                               kwq->kw_overlapwatch = 1;
+                               kwq->kw_kflags |= KSYN_KWF_OVERLAP_GUARD;
                                 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
                                 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
                                         curthreturns = 1;
@@ -2128,18 +2201,19 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                         }
                         
                         
-                       failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken);
-                       if (failed != 0) {
-                               kwq->kw_pre_intrcount = failed; /* actually a count */
-                               kwq->kw_pre_intrseq = limitrdnum;
-                               kwq->kw_pre_intrretbits = updatebits;
-                               kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
+                       nfailed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders,
+                                       updatebits, &woken);
+                       if (nfailed != 0) {
+                               _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_READ, nfailed,
+                                               limitrdnum, updatebits);
                         }
                         
                         error = 0;
                         
-                       if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
+                       if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) && 
+                                       ((updatebits & PTH_RWL_WBIT) == 0)) {
                                 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
+                       }
                 }
                         break;
                         
@@ -2151,7 +2225,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                         
                         if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
                                 block = 0;
-                               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
+                               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
                                         updatebits |= PTH_RWL_WBIT;
                                 }
                                 th = preth;
@@ -2161,23 +2235,23 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
                         } else {
                                 /* we are not granting writelock to the preposting thread */
                                 /* if there are writers present or the preposting write thread then W bit is to be set */
-                               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 ||
+                               if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count > 1 ||
                                     (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
                                         updatebits |= PTH_RWL_WBIT;
                                 }
                                 /* setup next in the queue */
-                               kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits);
+                               kret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, NULL, updatebits);
                                 if (kret == KERN_NOT_WAITING) {
-                                       kwq->kw_pre_intrcount = 1;      /* actually a count */
-                                       kwq->kw_pre_intrseq = low_writer;
-                                       kwq->kw_pre_intrretbits = updatebits;
-                                       kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
+                                       _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
+                                                       low_writer, updatebits);
                                 }
                                 error = 0;
                         }
                         kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
-                       if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
+                       if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != 
+                                       (PTH_RWL_KBIT | PTH_RWL_EBIT)) {
                                 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
+                       }
                 }
                         break;
                         
@@ -2204,7 +2278,8 @@ ksyn_queue_init(ksyn_queue_t kq)
  }
  
  int
-ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit)
+ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe,
+               uint32_t mgen, int fit)
  {
         ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
         uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
@@ -2229,11 +2304,13 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint
                         kq->ksynq_lastnum = lockseq;
                 }
         } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
-               /* During prepost when a thread is getting cancelled, we could have two with same seq */
+               /* During prepost when a thread is getting cancelled, we could have
+                * two with same seq */
                 res = EBUSY;
                 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
                         ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
-                       if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
+                       if (tmp != NULL && tmp->kwe_uth != NULL &&
+                                       pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
                                 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
                                 res = 0;
                         }
@@ -2267,7 +2344,8 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint
  }
  
  void
-ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
+ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+               ksyn_waitq_element_t kwe)
  {
         if (kq->ksynq_count == 0) {
                 panic("removing item from empty queue");
@@ -2308,7 +2386,8 @@ ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_elemen
  }
  
  ksyn_waitq_element_t
-ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
+ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+               uint32_t seq)
  {
         ksyn_waitq_element_t kwe;
         
@@ -2334,7 +2413,8 @@ ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
                         result = kwe;
                         
                         // KWE_THREAD_INWAIT must be strictly equal
-                       if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
+                       if (kwe->kwe_state == KWE_THREAD_INWAIT &&
+                                       (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
                                 result = NULL;
                         }
                         break;
@@ -2345,7 +2425,8 @@ ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
  
  /* look for a thread at lockseq, a */
  ksyn_waitq_element_t
-ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
+ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+               uint32_t uptoseq, uint32_t signalseq)
  {
         ksyn_waitq_element_t result = NULL;
         ksyn_waitq_element_t q_kwe, r_kwe;
@@ -2358,7 +2439,8 @@ ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint3
                                 return result;
                         }
                 }
-               if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
+               if (q_kwe->kwe_state == KWE_THREAD_PREPOST |
+                               q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
                         /* match any prepost at our same uptoseq or any broadcast above */
                         if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
                                 continue;
@@ -2399,6 +2481,10 @@ ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
         ksyn_waitq_element_t kwe;
         uint32_t tseq = upto & PTHRW_COUNT_MASK;
         ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
+       uint32_t freed = 0, signaled = 0;
+
+       PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_START, kwq->kw_addr,
+                       kqi, upto, all);
         
         while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
                 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
@@ -2411,17 +2497,28 @@ ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
                          * return them as spurious wait so the cvar state gets
                          * reset correctly.
                          */
+
+                       PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
+                                       kwq->kw_inqueue, 1);
                         
                         /* skip canceled ones */
                         /* wake the rest */
                         /* set M bit to indicate to waking CV to retun Inc val */
-                       (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
+                       (void)ksyn_signal(kwq, kqi, kwe,
+                                       PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
+                       signaled++;
                 } else {
+                       PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
+                                       kwq->kw_inqueue, 2);
                         ksyn_queue_remove_item(kwq, kq, kwe);
-                       pthread_kern->zfree(kwe_zone, kwe);
+                       zfree(kwe_zone, kwe);
                         kwq->kw_fakecount--;
+                       freed++;
                 }
         }
+
+       PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_END, kwq->kw_addr, freed,
+                       signaled, kwq->kw_inqueue);
  }
  
  /*************************************************************************/
@@ -2483,7 +2580,8 @@ find_nexthighseq(ksyn_wait_queue_t kwq)
  }
  
  int
-find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
+find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters,
+               uint32_t *countp)
  {
         int i;
         uint32_t count = 0;
@@ -2540,10 +2638,13 @@ ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
  {
         ksyn_waitq_element_t kwe, newkwe;
         uint32_t updatebits = 0;
-       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+       ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
         
         struct ksyn_queue kfreeq;
         ksyn_queue_init(&kfreeq);
+
+       PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_START, ckwq->kw_addr, upto,
+                       ckwq->kw_inqueue, 0);
         
  retry:
         TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
@@ -2555,11 +2656,14 @@ retry:
                 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
                         // Wake only non-canceled threads waiting on this CV.
                         if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
-                               (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
+                               PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, 0, 1);
+                               (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
                                 updatebits += PTHRW_INC;
                         }
                 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
                            kwe->kwe_state == KWE_THREAD_PREPOST) {
+                       PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe,
+                                       kwe->kwe_state, 2);
                         ksyn_queue_remove_item(ckwq, kq, kwe);
                         TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
                         ckwq->kw_fakecount--;
@@ -2571,27 +2675,34 @@ retry:
         /* Need to enter a broadcast in the queue (if not already at L == S) */
         
         if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
+               PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, ckwq->kw_lword,
+                               ckwq->kw_sword, 3);
+
                 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
                 if (newkwe == NULL) {
                         ksyn_wqunlock(ckwq);
-                       newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
+                       newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
                         TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
                         ksyn_wqlock(ckwq);
                         goto retry;
                 } else {
                         TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
                         ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
+                       PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, newkwe, 0, 4);
                 }
         }
         
         // free up any remaining things stumbled across above
         while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
                 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
-               pthread_kern->zfree(kwe_zone, kwe);
+               zfree(kwe_zone, kwe);
         }
+
+       PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_END, ckwq->kw_addr,
+                       updatebits, 0, 0);
         
         if (updatep != NULL) {
-               *updatep = updatebits;
+               *updatep |= updatebits;
         }
  }
  
@@ -2601,7 +2712,7 @@ ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
         if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
                 if (ckwq->kw_inqueue != 0) {
                         /* FREE THE QUEUE */
-                       ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0);
+                       ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 0);
  #if __TESTPANICS__
                         if (ckwq->kw_inqueue != 0)
                                 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
@@ -2619,8 +2730,10 @@ ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
  void
  psynch_zoneinit(void)
  {
-       kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
-       kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
+       kwq_zone = zinit(sizeof(struct ksyn_wait_queue),
+                       8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
+       kwe_zone = zinit(sizeof(struct ksyn_waitq_element),
+                       8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
  }
  
  void *
@@ -2641,13 +2754,14 @@ _pthread_get_thread_kwq(thread_t thread)
   * to pthread sync objects.
   */
  void
-_pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo * waitinfo)
+_pthread_find_owner(thread_t thread,
+               struct stackshot_thread_waitinfo * waitinfo)
  {
         ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
         switch (waitinfo->wait_type) {
                 case kThreadWaitPThreadMutex:
                         assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
-                       waitinfo->owner   = kwq->kw_owner;
+                       waitinfo->owner   = thread_tid(kwq->kw_owner);
                         waitinfo->context = kwq->kw_addr;
                         break;
                 /* Owner of rwlock not stored in kernel space due to races. Punt
diff --git a/kern/kern_trace.h b/kern/kern_trace.h

index e65e7b94d132895e92daa8636cb904fba760f6b9..2e59edc5a818acab9444f362aeee3449f35018a1 100644 (file)
--- a/kern/kern_trace.h
+++ b/kern/kern_trace.h
@@ -39,7 +39,9 @@
  // pthread tracing subclasses
  # define _TRACE_SUB_DEFAULT 0
  # define _TRACE_SUB_WORKQUEUE 1
-# define _TRACE_SUB_MUTEX 2
+// WQ_TRACE_REQUESTS_SUBCLASS is 2, in xnu
+# define _TRACE_SUB_MUTEX 3
+# define _TRACE_SUB_CONDVAR 4
  
  #ifndef _PTHREAD_BUILDING_CODES_
  
@@ -62,14 +64,14 @@ VM_UNSLIDE(void* ptr)
      return (void*)unslid_ptr;
  }
  
-# define PTHREAD_TRACE(x,a,b,c,d,e) \
-       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, a, b, c, d, e); } }
+# define PTHREAD_TRACE(x,a,b,c,d) \
+       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, a, b, c, d, 0); } }
  
-# define PTHREAD_TRACE_WQ(x,a,b,c,d,e) \
-       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, VM_UNSLIDE(a), b, c, d, e); } }
+# define PTHREAD_TRACE_WQ(x,a,b,c,d) \
+       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), b, c, d, 0); } }
  
  # define PTHREAD_TRACE_WQ_REQ(x,a,b,c,d,e) \
-       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } }
+       { if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } }
  
  #else // KERNEL
  
@@ -138,5 +140,25 @@ TRACE_CODE(psynch_mutex_uunlock, _TRACE_SUB_MUTEX, 0x2);
  TRACE_CODE(psynch_ksyn_incorrect_owner, _TRACE_SUB_MUTEX, 0x3);
  TRACE_CODE(psynch_mutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x4);
  TRACE_CODE(psynch_mutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x5);
+TRACE_CODE(psynch_mutex_clearprepost, _TRACE_SUB_MUTEX, 0x6);
+TRACE_CODE(psynch_mutex_kwqallocate, _TRACE_SUB_MUTEX, 0x7);
+TRACE_CODE(psynch_mutex_kwqdeallocate, _TRACE_SUB_MUTEX, 0x8);
+TRACE_CODE(psynch_mutex_kwqprepost, _TRACE_SUB_MUTEX, 0x9);
+TRACE_CODE(psynch_mutex_markprepost, _TRACE_SUB_MUTEX, 0x10);
+TRACE_CODE(psynch_mutex_kwqcollision, _TRACE_SUB_MUTEX, 0x11);
+TRACE_CODE(psynch_ffmutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x12);
+TRACE_CODE(psynch_ffmutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x13);
+TRACE_CODE(psynch_ffmutex_wake, _TRACE_SUB_MUTEX, 0x14);
+TRACE_CODE(psynch_mutex_kwqsignal, _TRACE_SUB_MUTEX, 0x15);
+TRACE_CODE(psynch_ffmutex_wait, _TRACE_SUB_MUTEX, 0x16);
+TRACE_CODE(psynch_mutex_kwqwait, _TRACE_SUB_MUTEX, 0x17);
+
+TRACE_CODE(psynch_cvar_kwait, _TRACE_SUB_CONDVAR, 0x0);
+TRACE_CODE(psynch_cvar_clrprepost, _TRACE_SUB_CONDVAR, 0x1);
+TRACE_CODE(psynch_cvar_freeitems, _TRACE_SUB_CONDVAR, 0x2);
+TRACE_CODE(psynch_cvar_signal, _TRACE_SUB_CONDVAR, 0x3);
+TRACE_CODE(psynch_cvar_broadcast, _TRACE_SUB_CONDVAR, 0x5);
+TRACE_CODE(psynch_cvar_zeroed, _TRACE_SUB_CONDVAR, 0x6);
+TRACE_CODE(psynch_cvar_updateval, _TRACE_SUB_CONDVAR, 0x7);
  
  #endif // _KERN_TRACE_H_
diff --git a/kern/synch_internal.h b/kern/synch_internal.h

index 6b22c412b7396dd035b46cfbd8997ff499710103..1b9d6c2359e00b2d49c4ed7eda170403b50a93a3 100644 (file)
--- a/kern/synch_internal.h
+++ b/kern/synch_internal.h
@@ -24,6 +24,12 @@
  #ifndef __SYNCH_INTERNAL_H__
  #define __SYNCH_INTERNAL_H__
  
+// kwe_state
+enum {
+       KWE_THREAD_INWAIT = 1,
+       KWE_THREAD_PREPOST,
+       KWE_THREAD_BROADCAST,
+};
  
  #define _PTHREAD_MTX_OPT_PSHARED 0x010
  #define _PTHREAD_MTX_OPT_NOTIFY 0x1000 /* notify to drop mutex handling in cvwait */
@@ -79,8 +85,13 @@
  #define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0)
  
  // S word tests
-#define is_rws_setseq(x) (((x) & PTH_RWS_SBIT))
-#define is_rws_setunlockinit(x) (((x) & PTH_RWS_IBIT))
+#define is_rws_sbit_set(x) (((x) & PTH_RWS_SBIT) != 0)
+#define is_rws_unlockinit_set(x) (((x) & PTH_RWS_IBIT) != 0)
+#define is_rws_savemask_set(x) (((x) & PTHRW_RWS_SAVEMASK) != 0)
+#define is_rws_pbit_set(x) (((x) & PTH_RWS_CV_PBIT) != 0)
+
+// kwe_flags
+#define KWE_FLAG_LOCKPREPOST   0x1 // cvwait caused a lock prepost
  
  static inline int
  is_seqlower(uint32_t x, uint32_t y)
diff --git a/kern/workqueue_internal.h b/kern/workqueue_internal.h

index 28d870e529aba81f885c27274791e25de43fa346..c044fe763754faaef4c45d74e8a7d01423649b05 100644 (file)
--- a/kern/workqueue_internal.h
+++ b/kern/workqueue_internal.h
@@ -33,175 +33,18 @@
   * duplicate definitions that used to exist in both projects, when separate.
   */
  
-/* workq_kernreturn commands */
-#define WQOPS_THREAD_RETURN        0x04        /* parks the thread back into the kernel */
-#define WQOPS_QUEUE_NEWSPISUPP     0x10        /* this is to check for newer SPI support */
-#define WQOPS_QUEUE_REQTHREADS     0x20        /* request number of threads of a prio */
-#define WQOPS_QUEUE_REQTHREADS2    0x30        /* request a number of threads in a given priority bucket */
-#define WQOPS_THREAD_KEVENT_RETURN 0x40        /* parks the thread after delivering the passed kevent array */
-#define WQOPS_SET_EVENT_MANAGER_PRIORITY 0x80  /* max() in the provided priority in the the priority of the event manager */
-#define WQOPS_THREAD_WORKLOOP_RETURN 0x100     /* parks the thread after delivering the passed kevent array */
-#define WQOPS_SHOULD_NARROW 0x200      /* checks whether we should narrow our concurrency */
-
-/* flag values for upcall flags field, only 8 bits per struct threadlist */
-#define        WQ_FLAG_THREAD_PRIOMASK                 0x0000ffff
-#define WQ_FLAG_THREAD_PRIOSHIFT               16
-#define        WQ_FLAG_THREAD_OVERCOMMIT               0x00010000      /* thread is with overcommit prio */
-#define        WQ_FLAG_THREAD_REUSE                    0x00020000      /* thread is being reused */
-#define        WQ_FLAG_THREAD_NEWSPI                   0x00040000      /* the call is with new SPIs */
-#define WQ_FLAG_THREAD_KEVENT                  0x00080000  /* thread is response to kevent req */
-#define WQ_FLAG_THREAD_EVENT_MANAGER   0x00100000  /* event manager thread */
-#define WQ_FLAG_THREAD_TSD_BASE_SET            0x00200000  /* tsd base has already been set */
-#define WQ_FLAG_THREAD_WORKLOOP                        0x00400000  /* workloop thread */
-
-#define WQ_THREAD_CLEANUP_QOS QOS_CLASS_DEFAULT
-
-#define WQ_KEVENT_LIST_LEN  16 // WORKQ_KEVENT_EVENT_BUFFER_LEN
-#define WQ_KEVENT_DATA_SIZE (32 * 1024)
-
-/* These definitions are only available to the kext, to avoid bleeding constants and types across the boundary to
- * the userspace library.
- */
-#ifdef KERNEL
-
-/* These defines come from kern/thread.h but are XNU_KERNEL_PRIVATE so do not get
- * exported to kernel extensions.
- */
-#define SCHED_CALL_BLOCK 0x1
-#define SCHED_CALL_UNBLOCK 0x2
-
-// kwe_state
-enum {
-       KWE_THREAD_INWAIT = 1,
-       KWE_THREAD_PREPOST,
-       KWE_THREAD_BROADCAST,
-};
-
-/* old workq priority scheme */
-
-#define WORKQUEUE_HIGH_PRIOQUEUE    0       /* high priority queue */
-#define WORKQUEUE_DEFAULT_PRIOQUEUE 1       /* default priority queue */
-#define WORKQUEUE_LOW_PRIOQUEUE     2       /* low priority queue */
-#define WORKQUEUE_BG_PRIOQUEUE      3       /* background priority queue */
-
-#define WORKQUEUE_NUM_BUCKETS 7
-
  // Sometimes something gets passed a bucket number and we need a way to express
-// that it's actually the event manager.  Use the (n+1)th bucket for that.
-#define WORKQUEUE_EVENT_MANAGER_BUCKET (WORKQUEUE_NUM_BUCKETS-1)
-
-/* wq_max_constrained_threads = max(64, N_CPU * WORKQUEUE_CONSTRAINED_FACTOR)
- * This used to be WORKQUEUE_NUM_BUCKETS + 1 when NUM_BUCKETS was 4, yielding
- * N_CPU * 5. When NUM_BUCKETS changed, we decided that the limit should
- * not change. So the factor is now always 5.
- */
-#define WORKQUEUE_CONSTRAINED_FACTOR 5
-
-#define WORKQUEUE_OVERCOMMIT   0x10000
-
-/*
- * A thread which is scheduled may read its own th_priority field without
- * taking the workqueue lock.  Other fields should be assumed to require the
- * lock.
- */
-struct threadlist {
-       TAILQ_ENTRY(threadlist) th_entry;
-       thread_t th_thread;
-       struct workqueue *th_workq;
-       mach_vm_offset_t th_stackaddr;
-       mach_port_name_t th_thport;
-       uint16_t th_flags;
-       uint8_t th_upcall_flags;
-       uint8_t th_priority;
-};
-
-#define TH_LIST_INITED         0x0001 /* Set at thread creation. */
-#define TH_LIST_RUNNING                0x0002 /* On thrunlist, not parked. */
-#define TH_LIST_KEVENT         0x0004 /* Thread requested by kevent */
-#define TH_LIST_NEW            0x0008 /* First return to userspace */
-#define TH_LIST_BUSY           0x0010 /* Removed from idle list but not ready yet. */
-#define TH_LIST_KEVENT_BOUND   0x0020 /* Thread bound to kqueues */
-#define TH_LIST_CONSTRAINED    0x0040 /* Non-overcommit thread. */
-#define TH_LIST_EVENT_MGR_SCHED_PRI    0x0080 /* Non-QoS Event Manager */
-#define TH_LIST_UNBINDING      0x0100 /* Thread is unbinding during park */
-#define TH_LIST_REMOVING_VOUCHER       0x0200 /* Thread is removing its voucher */
-#define TH_LIST_PACING         0x0400 /* Thread is participating in pacing */
-
-struct threadreq {
-       TAILQ_ENTRY(threadreq) tr_entry;
-       uint16_t tr_flags;
-       uint8_t tr_state;
-       uint8_t tr_priority;
-};
-TAILQ_HEAD(threadreq_head, threadreq);
-
-#define TR_STATE_NEW           0 /* Not yet enqueued */
-#define TR_STATE_WAITING       1 /* Waiting to be serviced - on reqlist */
-#define TR_STATE_COMPLETE      2 /* Request handled - for caller to free */
-#define TR_STATE_DEAD          3
-
-#define TR_FLAG_KEVENT         0x01
-#define TR_FLAG_OVERCOMMIT     0x02
-#define TR_FLAG_ONSTACK                0x04
-#define TR_FLAG_WORKLOOP       0x08
-#define TR_FLAG_NO_PACING      0x10
-
-#if defined(__LP64__)
-typedef unsigned __int128 wq_thactive_t;
-#else
-typedef uint64_t wq_thactive_t;
-#endif
-
-struct workqueue {
-       proc_t          wq_proc;
-       vm_map_t        wq_map;
-       task_t          wq_task;
-
-       lck_spin_t      wq_lock;
-
-       thread_call_t   wq_atimer_delayed_call;
-       thread_call_t   wq_atimer_immediate_call;
-
-       uint32_t _Atomic wq_flags;
-       uint32_t        wq_timer_interval;
-       uint32_t        wq_threads_scheduled;
-       uint32_t        wq_constrained_threads_scheduled;
-       uint32_t        wq_nthreads;
-       uint32_t        wq_thidlecount;
-       uint32_t        wq_event_manager_priority;
-       uint8_t         wq_lflags; // protected by wqueue lock
-       uint8_t         wq_paced; // protected by wqueue lock
-       uint16_t    __wq_unused;
-
-       TAILQ_HEAD(, threadlist) wq_thrunlist;
-       TAILQ_HEAD(, threadlist) wq_thidlelist;
-       TAILQ_HEAD(, threadlist) wq_thidlemgrlist;
-
-       uint32_t        wq_reqcount;    /* number of elements on the following lists */
-       struct threadreq_head wq_overcommit_reqlist[WORKQUEUE_EVENT_MANAGER_BUCKET];
-       struct threadreq_head wq_reqlist[WORKQUEUE_EVENT_MANAGER_BUCKET];
-       struct threadreq wq_event_manager_threadreq;
-
-       struct threadreq *wq_cached_threadreq;
-
-       uint16_t        wq_thscheduled_count[WORKQUEUE_NUM_BUCKETS];
-       _Atomic wq_thactive_t wq_thactive;
-       _Atomic uint64_t wq_lastblocked_ts[WORKQUEUE_NUM_BUCKETS];
-};
-#define WQ_EXITING             0x01
-#define WQ_ATIMER_DELAYED_RUNNING      0x02
-#define WQ_ATIMER_IMMEDIATE_RUNNING    0x04
-
-#define WQL_ATIMER_BUSY                0x01
-#define WQL_ATIMER_WAITING     0x02
-
-#define WORKQUEUE_MAXTHREADS           512
-#define WQ_STALLED_WINDOW_USECS                200
-#define WQ_REDUCE_POOL_WINDOW_USECS    5000000
-#define        WQ_MAX_TIMER_INTERVAL_USECS     50000
-
-#define WQ_THREADLIST_EXITING_POISON (void *)~0ul
-
-#endif // KERNEL
+// that it's actually the event manager.  Use the (0)th bucket for that.
+#define WORKQ_THREAD_QOS_MIN        (THREAD_QOS_MAINTENANCE)
+#define WORKQ_THREAD_QOS_MAX        (THREAD_QOS_LAST - 1)
+#define WORKQ_THREAD_QOS_CLEANUP    (THREAD_QOS_LEGACY)
+#define WORKQ_THREAD_QOS_MANAGER    (THREAD_QOS_LAST) // outside of MIN/MAX
+
+#define WORKQ_NUM_QOS_BUCKETS       (WORKQ_THREAD_QOS_MAX)
+#define WORKQ_NUM_BUCKETS           (WORKQ_THREAD_QOS_MAX + 1)
+#define WORKQ_IDX(qos)              ((qos) - 1) // 0 based index
+
+// magical `nkevents` values for _pthread_wqthread
+#define WORKQ_EXIT_THREAD_NKEVENT   (-1)
  
  #endif // _WORKQUEUE_INTERNAL_H_
diff --git a/libpthread.xcodeproj/project.pbxproj b/libpthread.xcodeproj/project.pbxproj

index 33df53770ed9fd472894ec34b84a3cd3e4aaaba0..1c4fd1a07292ce8965a0a5ba9daf68fccd0f5044 100644 (file)
--- a/libpthread.xcodeproj/project.pbxproj
+++ b/libpthread.xcodeproj/project.pbxproj
@@ -62,6 +62,20 @@
  /* End PBXAggregateTarget section */
  
  /* Begin PBXBuildFile section */
+               6E2A3BBE2101222F0003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; settings = {ATTRIBUTES = (Public, ); }; };
+               6E2A3BBF210122300003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; settings = {ATTRIBUTES = (Public, ); }; };
+               6E2A3BC0210122340003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; };
+               6E5869C720C9040A00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
+               6E5869C820C9040B00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
+               6E5869C920C9040C00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; };
+               6E5869CB20C9043200F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869CC20C9043B00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869CD20C9043B00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869CE20C9043C00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869CF20C9043C00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869D020C9043D00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869D120C9043D00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+               6E5869D220C9043E00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
                 6E8C16541B14F08A00C8987C /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; };
                 6E8C16551B14F08A00C8987C /* pthread.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325FA15B7513200270056 /* pthread.c */; };
                 6E8C16561B14F08A00C8987C /* pthread_cancelable.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F115B7513200270056 /* pthread_cancelable.c */; };
@@ -166,7 +180,6 @@
                 C9A1BF5015C9A59B006BB313 /* sched.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A3260115B7513700270056 /* sched.h */; settings = {ATTRIBUTES = (Public, ); }; };
                 C9A1BF5315C9A9F5006BB313 /* pthread_cancelable_cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5215C9A9F5006BB313 /* pthread_cancelable_cancel.c */; };
                 C9A1BF5515C9CB9D006BB313 /* pthread_cancelable_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5415C9CB9D006BB313 /* pthread_cancelable_legacy.c */; };
-               C9A960B0183EB42700AE10C8 /* kern_policy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A960AF183EB42700AE10C8 /* kern_policy.c */; };
                 C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F915B7513200270056 /* workqueue_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                 C9BB478D15E6ADF700F135B7 /* tsd_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F415B7513200270056 /* tsd_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
                 C9CCFB9D18B6D0910060CAAE /* qos_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C99B17DA189C2E1B00991D38 /* qos_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
@@ -334,11 +347,15 @@
                         containerPortal = C9A325D915B7347000270056 /* Project object */;
                         proxyType = 1;
                         remoteGlobalIDString = E4F4498C1E82C1F000A7FB9A;
-                       remoteInfo = "libpthread alt resolved";
+                       remoteInfo = "libpthread armv81 resolved";
                 };
  /* End PBXContainerItemProxy section */
  
  /* Begin PBXFileReference section */
+               6E2A3BBD210122230003B53B /* stack_np.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack_np.h; sourceTree = "<group>"; };
+               6E514A0220B67C0900844EE1 /* offsets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = offsets.h; sourceTree = "<group>"; };
+               6E5869C620C8FE8300F1CB75 /* dependency_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dependency_private.h; sourceTree = "<group>"; };
+               6E5869CA20C9043200F1CB75 /* pthread_dependency.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = pthread_dependency.c; sourceTree = "<group>"; };
                 6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
                 6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = pthread_introspection.xcconfig; sourceTree = "<group>"; };
                 6EB232C91B0EB29D005915CE /* resolver.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = resolver.c; sourceTree = "<group>"; };
@@ -444,7 +461,6 @@
                 C9A3260015B7513700270056 /* pthread_spis.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pthread_spis.h; sourceTree = "<group>"; };
                 C9A3260115B7513700270056 /* sched.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sched.h; sourceTree = "<group>"; };
                 C9A3260C15B759B600270056 /* pthread.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = pthread.xcconfig; sourceTree = "<group>"; };
-               C9A960AF183EB42700AE10C8 /* kern_policy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kern_policy.c; sourceTree = "<group>"; };
                 C9A960B318452B2F00AE10C8 /* pthread.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; path = pthread.py; sourceTree = "<group>"; };
                 C9A960B618452CDD00AE10C8 /* install-lldbmacros.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-lldbmacros.sh"; sourceTree = "<group>"; };
                 C9C2212D15FA978D00447568 /* pthread.aliases */ = {isa = PBXFileReference; lastKnownFileType = text; path = pthread.aliases; sourceTree = "<group>"; };
@@ -460,7 +476,7 @@
                 E4943AA71E80BD8400D2A961 /* resolver_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resolver_internal.h; sourceTree = "<group>"; };
                 E4D962F919086AD600E8A9F2 /* qos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qos.h; sourceTree = "<group>"; };
                 E4D962FC19086C5700E8A9F2 /* install-sys-headers.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "install-sys-headers.sh"; sourceTree = "<group>"; };
-               E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_alt.a; sourceTree = BUILT_PRODUCTS_DIR; };
+               E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_armv81.a; sourceTree = BUILT_PRODUCTS_DIR; };
                 E4F449A31E82CF0100A7FB9A /* resolver.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = resolver.xcconfig; sourceTree = "<group>"; };
                 E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
                 FC30E28D16A747AD00A25B5F /* synch_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = synch_internal.h; sourceTree = "<group>"; };
@@ -547,7 +563,6 @@
                                 C9D9E8FE1626248800448CED /* pthread-Info.plist */,
                                 C9C533841607C928009988FA /* kern_internal.h */,
                                 C9169DDF1603DF9B005A2F8C /* kern_init.c */,
-                               C9A960AF183EB42700AE10C8 /* kern_policy.c */,
                                 C9169DDB1603DE84005A2F8C /* kern_synch.c */,
                                 C9169DDC1603DE84005A2F8C /* kern_support.c */,
                                 C979E9FB18A1BC2A000951E5 /* kern_trace.h */,
@@ -656,7 +671,7 @@
                                 6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */,
                                 C04545B81C584F4A006A53B3 /* libpthread.a */,
                                 E41505E71E818BEB00F243FB /* libpthread_mp.a */,
-                               E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */,
+                               E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */,
                                 E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */,
                         );
                         name = Products;
@@ -665,6 +680,7 @@
                 C9A325ED15B74FB600270056 /* src */ = {
                         isa = PBXGroup;
                         children = (
+                               6E514A0220B67C0900844EE1 /* offsets.h */,
                                 C9A325F315B7513200270056 /* internal.h */,
                                 C9A325EF15B7513200270056 /* plockstat.d */,
                                 C9A325FA15B7513200270056 /* pthread.c */,
@@ -674,6 +690,7 @@
                                 C9A325F215B7513200270056 /* pthread_cond.c */,
                                 924D8EDE1C11832A002AC2BC /* pthread_cwd.c */,
                                 C9A325F515B7513200270056 /* pthread_mutex.c */,
+                               6E5869CA20C9043200F1CB75 /* pthread_dependency.c */,
                                 C9A325F615B7513200270056 /* pthread_rwlock.c */,
                                 C975D5DC15C9D16B0098ECD8 /* pthread_support.c */,
                                 C9A325F815B7513200270056 /* pthread_tsd.c */,
@@ -694,6 +711,7 @@
                                 C9A3260015B7513700270056 /* pthread_spis.h */,
                                 C9A3260115B7513700270056 /* sched.h */,
                                 C98C95D818FF1F4E005654FB /* spawn.h */,
+                               6E2A3BBD210122230003B53B /* stack_np.h */,
                                 C9244C1A185FCFED00075748 /* qos.h */,
                         );
                         path = pthread;
@@ -756,6 +774,7 @@
                                 E4657D4017284F7B007D1847 /* introspection_private.h */,
                                 C99B17DA189C2E1B00991D38 /* qos_private.h */,
                                 E4063CF21906B4FB000202F9 /* qos.h */,
+                               6E5869C620C8FE8300F1CB75 /* dependency_private.h */,
                         );
                         path = private;
                         sourceTree = "<group>";
@@ -805,6 +824,7 @@
                         isa = PBXHeadersBuildPhase;
                         buildActionMask = 2147483647;
                         files = (
+                               6E2A3BC0210122340003B53B /* stack_np.h in Headers */,
                                 6E8C16711B14F08A00C8987C /* posix_sched.h in Headers */,
                                 6E8C166F1B14F08A00C8987C /* introspection_private.h in Headers */,
                                 E41A64AE1E83C470009479A9 /* introspection.h in Headers */,
@@ -819,6 +839,7 @@
                                 6E8C166B1B14F08A00C8987C /* pthread_impl.h in Headers */,
                                 6E8C166D1B14F08A00C8987C /* pthread_spis.h in Headers */,
                                 6E8C166E1B14F08A00C8987C /* sched.h in Headers */,
+                               6E5869C920C9040C00F1CB75 /* dependency_private.h in Headers */,
                                 6E8C16751B14F08A00C8987C /* spawn.h in Headers */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
@@ -848,6 +869,7 @@
                         isa = PBXHeadersBuildPhase;
                         buildActionMask = 2147483647;
                         files = (
+                               6E2A3BBE2101222F0003B53B /* stack_np.h in Headers */,
                                 C9244C1B185FD33000075748 /* qos.h in Headers */,
                                 C9A1BF4D15C9A58E006BB313 /* pthread.h in Headers */,
                                 C9A1BF4E15C9A594006BB313 /* pthread_impl.h in Headers */,
@@ -862,6 +884,7 @@
                                 C98C95D918FF1F4E005654FB /* spawn.h in Headers */,
                                 C99AD87C15DEC5290009A6F8 /* spinlock_private.h in Headers */,
                                 C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */,
+                               6E5869C720C9040A00F1CB75 /* dependency_private.h in Headers */,
                                 C9153096167ACC2B006BB094 /* private.h in Headers */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
@@ -877,6 +900,7 @@
                         isa = PBXHeadersBuildPhase;
                         buildActionMask = 2147483647;
                         files = (
+                               6E2A3BBF210122300003B53B /* stack_np.h in Headers */,
                                 E4F449BE1E82D03500A7FB9A /* qos.h in Headers */,
                                 E4F449BF1E82D03500A7FB9A /* pthread.h in Headers */,
                                 E4F449C01E82D03500A7FB9A /* pthread_impl.h in Headers */,
@@ -891,6 +915,7 @@
                                 E4F449C91E82D03500A7FB9A /* spawn.h in Headers */,
                                 E4F449CA1E82D03500A7FB9A /* spinlock_private.h in Headers */,
                                 E4F449CB1E82D03500A7FB9A /* workqueue_private.h in Headers */,
+                               6E5869C820C9040B00F1CB75 /* dependency_private.h in Headers */,
                                 E4F449CC1E82D03500A7FB9A /* private.h in Headers */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
@@ -1044,9 +1069,9 @@
                         productReference = E41505E71E818BEB00F243FB /* libpthread_mp.a */;
                         productType = "com.apple.product-type.library.static";
                 };
-               E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */ = {
+               E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */ = {
                         isa = PBXNativeTarget;
-                       buildConfigurationList = E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread alt resolved" */;
+                       buildConfigurationList = E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread armv81 resolved" */;
                         buildPhases = (
                                 E4F4498D1E82C1F000A7FB9A /* Sources */,
                                 E4F4499C1E82C1F000A7FB9A /* Symlink normal variant */,
@@ -1055,9 +1080,9 @@
                         );
                         dependencies = (
                         );
-                       name = "libpthread alt resolved";
-                       productName = libpthread_alt.a;
-                       productReference = E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */;
+                       name = "libpthread armv81 resolved";
+                       productName = libpthread_armv81.a;
+                       productReference = E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */;
                         productType = "com.apple.product-type.library.static";
                 };
                 E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */ = {
@@ -1118,7 +1143,7 @@
                                 E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */,
                                 6E8C16511B14F08A00C8987C /* libsystem_pthread introspection */,
                                 E41505D01E818BEB00F243FB /* libpthread mp resolved */,
-                               E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */,
+                               E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */,
                                 C04545A21C584F4A006A53B3 /* libpthread generic */,
                                 C90E7A9E15DC3C3800A06D48 /* libpthread dyld */,
                                 74E594911613AAF4006C417B /* libpthread eOS */,
@@ -1389,6 +1414,7 @@
                                 6E8C16631B14F08A00C8987C /* pthread_support.c in Sources */,
                                 6E8C16641B14F08A00C8987C /* thread_setup.c in Sources */,
                                 6E8C16651B14F08A00C8987C /* pthread_atfork.c in Sources */,
+                               6E5869CD20C9043B00F1CB75 /* pthread_dependency.c in Sources */,
                                 6E8C16661B14F08A00C8987C /* pthread_asm.s in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
@@ -1400,6 +1426,7 @@
                                 6EB232D01B0EB325005915CE /* resolver.c in Sources */,
                                 74E594931613AAF4006C417B /* pthread.c in Sources */,
                                 74E594941613AAF4006C417B /* pthread_cancelable.c in Sources */,
+                               6E5869D220C9043E00F1CB75 /* pthread_dependency.c in Sources */,
                                 74E594A61613AB10006C417B /* pthread_cancelable_cancel.c in Sources */,
                                 74E594951613AAF4006C417B /* pthread_cond.c in Sources */,
                                 74E594961613AAF4006C417B /* pthread_mutex.c in Sources */,
@@ -1421,6 +1448,7 @@
                                 C04545A41C584F4A006A53B3 /* resolver.c in Sources */,
                                 C04545A51C584F4A006A53B3 /* pthread.c in Sources */,
                                 C04545A61C584F4A006A53B3 /* pthread_cancelable.c in Sources */,
+                               6E5869D020C9043D00F1CB75 /* pthread_dependency.c in Sources */,
                                 C04545A71C584F4A006A53B3 /* pthread_cancelable_cancel.c in Sources */,
                                 C04545A81C584F4A006A53B3 /* pthread_cond.c in Sources */,
                                 C04545A91C584F4A006A53B3 /* pthread_mutex.c in Sources */,
@@ -1445,6 +1473,7 @@
                                 C90E7AA515DC3C9D00A06D48 /* pthread_cancelable.c in Sources */,
                                 C90E7AA615DC3C9D00A06D48 /* pthread_cond.c in Sources */,
                                 C90E7AA715DC3C9D00A06D48 /* pthread_mutex.c in Sources */,
+                               6E5869D120C9043D00F1CB75 /* pthread_dependency.c in Sources */,
                                 C90E7AA815DC3C9D00A06D48 /* pthread_rwlock.c in Sources */,
                                 C90E7AA915DC3C9D00A06D48 /* pthread_support.c in Sources */,
                                 C90E7AAA15DC3C9D00A06D48 /* pthread_tsd.c in Sources */,
@@ -1476,6 +1505,7 @@
                                 C975D5DD15C9D16B0098ECD8 /* pthread_support.c in Sources */,
                                 C948FCF715D1D1E100180BF5 /* thread_setup.c in Sources */,
                                 C90E7AB815DC40D900A06D48 /* pthread_atfork.c in Sources */,
+                               6E5869CB20C9043200F1CB75 /* pthread_dependency.c in Sources */,
                                 C99AD88015E2D8B50009A6F8 /* pthread_asm.s in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
@@ -1485,7 +1515,6 @@
                         buildActionMask = 2147483647;
                         files = (
                                 C9169DDE1603DE84005A2F8C /* kern_support.c in Sources */,
-                               C9A960B0183EB42700AE10C8 /* kern_policy.c in Sources */,
                                 C9169DE01603DF9B005A2F8C /* kern_init.c in Sources */,
                                 C9D75E4216127B3900C2FB26 /* kern_synch.c in Sources */,
                         );
@@ -1498,6 +1527,7 @@
                                 E41505D21E818BEB00F243FB /* resolver.c in Sources */,
                                 E41505D31E818BEB00F243FB /* pthread.c in Sources */,
                                 E41505D41E818BEB00F243FB /* pthread_cancelable.c in Sources */,
+                               6E5869CE20C9043C00F1CB75 /* pthread_dependency.c in Sources */,
                                 E41505D51E818BEB00F243FB /* pthread_cancelable_cancel.c in Sources */,
                                 E41505D61E818BEB00F243FB /* pthread_cond.c in Sources */,
                                 E41505D71E818BEB00F243FB /* pthread_mutex.c in Sources */,
@@ -1519,6 +1549,7 @@
                                 E4F4498E1E82C1F000A7FB9A /* resolver.c in Sources */,
                                 E4F4498F1E82C1F000A7FB9A /* pthread.c in Sources */,
                                 E4F449901E82C1F000A7FB9A /* pthread_cancelable.c in Sources */,
+                               6E5869CF20C9043C00F1CB75 /* pthread_dependency.c in Sources */,
                                 E4F449911E82C1F000A7FB9A /* pthread_cancelable_cancel.c in Sources */,
                                 E4F449921E82C1F000A7FB9A /* pthread_cond.c in Sources */,
                                 E4F449931E82C1F000A7FB9A /* pthread_mutex.c in Sources */,
@@ -1554,6 +1585,7 @@
                                 E4F449B81E82D03500A7FB9A /* pthread_support.c in Sources */,
                                 E4F449B91E82D03500A7FB9A /* thread_setup.c in Sources */,
                                 E4F449BA1E82D03500A7FB9A /* pthread_atfork.c in Sources */,
+                               6E5869CC20C9043B00F1CB75 /* pthread_dependency.c in Sources */,
                                 E4F449BB1E82D03500A7FB9A /* pthread_asm.s in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
@@ -1628,7 +1660,7 @@
                 };
                 E4F449A21E82C5A400A7FB9A /* PBXTargetDependency */ = {
                         isa = PBXTargetDependency;
-                       target = E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */;
+                       target = E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */;
                         targetProxy = E4F449A11E82C5A400A7FB9A /* PBXContainerItemProxy */;
                 };
  /* End PBXTargetDependency section */
@@ -1832,7 +1864,7 @@
                         isa = XCBuildConfiguration;
                         baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */;
                         buildSettings = {
-                               RESOLVED_VARIANT = alt;
+                               RESOLVED_VARIANT = armv81;
                         };
                         name = Release;
                 };
@@ -1840,7 +1872,7 @@
                         isa = XCBuildConfiguration;
                         baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */;
                         buildSettings = {
-                               RESOLVED_VARIANT = alt;
+                               RESOLVED_VARIANT = armv81;
                         };
                         name = Debug;
                 };
@@ -1987,7 +2019,7 @@
                         defaultConfigurationIsVisible = 0;
                         defaultConfigurationName = Release;
                 };
-               E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread alt resolved" */ = {
+               E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread armv81 resolved" */ = {
                         isa = XCConfigurationList;
                         buildConfigurations = (
                                 E4F4499E1E82C1F000A7FB9A /* Release */,
diff --git a/lldbmacros/init.py b/lldbmacros/init.py

new file mode 100644 (file)

index 0000000..af7fe69
--- /dev/null
+++ b/lldbmacros/init.py
@@ -0,0 +1,310 @@
+from xnu import *
+import struct
+
+def GetSeqCount(seq):
+       return (seq >> 8)
+
+def GetLSeqBits(seq):
+       rv = ""
+       if seq & 0x1:
+               rv += "K"
+       if seq & 0x2:
+               rv += "E"
+       if seq & 0x4:
+               rv += "W"
+       if seq & 0x20:
+               rv += "M"
+       if seq & 0x40:
+               rv += "U"
+       if seq & 0x80:
+               rv += "I"
+       return rv
+
+def GetSSeqBits(seq):
+       rv = ""
+       if seq & 0x1:
+               rv += "S"
+       if seq & 0x2:
+               rv += "I"
+       if seq & 0x4:
+               rv += "Ws"
+       return rv
+
+def GetLSeqSummary(seq):
+       return "{:d} {:s}".format(GetSeqCount(seq), GetLSeqBits(seq))
+
+def GetSSeqSummary(seq):
+       return "{:d} {:s}".format(GetSeqCount(seq), GetSSeqBits(seq))
+
+@header("{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}".format('sig', 'tid', 'options', 'lseq', 'useq'))
+def GetUserMutexSummary(task, uaddr):
+       if int(task.t_flags) & 0x1:
+               mtxlayout = "QIIhhIQIII"
+               padoffset = 1
+       else:
+               mtxlayout = "QIIhhQIII"
+               padoffset = 0
+
+       data = GetUserDataAsString(task, unsigned(uaddr), struct.calcsize(mtxlayout))
+       info = struct.unpack(mtxlayout, data)
+
+       format = "{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}"
+       sigstr = str("{0: <#020x}".format(info[0]))
+
+       # the options field dictates whether we were created misaligned
+       if info[2] & 0x800:
+               lseq = info[7+padoffset]
+               useq = info[8+padoffset]
+       else:
+               lseq = info[6+padoffset]
+               useq = info[7+padoffset]
+
+       return format.format(sigstr, hex(info[5+padoffset]), hex(info[2]), hex(lseq), hex(useq))
+
+@lldb_command('showusermutex')
+def PthreadShowUserMutex(cmd_args=None):
+       """
+       display information about a userspace mutex at a given address
+       Syntax: (lldb) showusermutex <task_t> <uaddr>
+       """
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+       task = kern.GetValueFromAddress(cmd_args[0], "task_t")
+       uaddr = kern.GetValueFromAddress(cmd_args[1], "user_addr_t")
+
+       print GetUserMutexSummary.header
+       print GetUserMutexSummary(task, uaddr)
+
+@lldb_type_summary(['ksyn_wait_queue *', 'ksyn_wait_queue_t'])
+@header("{:<20s} {:<20s} {:<10s} {:<6s} {:<6s} {:<8s} {:<8s} {:<8s} {:<8s}".format('kwq', 'uaddr', 'type', 'pflags', 'kflags', 'refs', 'indrop', 'waiters', 'preposts'))
+def GetKwqSummary(kwq):
+       format = "{:<#20x} {:<#20x} {:<10s} {:<6s} {:<6s} {:<8d} {:<8d} {:<8d} {:<8d}\n"
+       kwq = Cast(kwq, "ksyn_wait_queue_t")
+
+       kwqtype = ""
+       if kwq.kw_type & 0xff == 0x01:
+               kwqtype = "mtx"
+       if kwq.kw_type & 0xff == 0x02:
+               kwqtype = "cvar"
+       if kwq.kw_type & 0xff == 0x04:
+               kwqtype = "rwl"
+       if kwq.kw_type & 0xff == 0x05:
+               kwqtype = "sema"
+
+       if kwq.kw_type & 0x1000 == 0x1000:
+               kwqtype += "W" # INWAIT
+       if kwq.kw_type & 0x2000 == 0x2000:
+               kwqtype += "D" # INDROP
+
+       pflags = ""
+       if kwq.kw_pflags & 0x2:
+               pflags += "H" # INHASH
+       if kwq.kw_pflags & 0x4:
+               pflags += "S" # SHARED
+       if kwq.kw_pflags & 0x8:
+               pflags += "W" # WAITING
+       if kwq.kw_pflags & 0x10:
+               pflags += "F" # FREELIST
+
+       kflags = ""
+       if kwq.kw_kflags & 0x1:
+               kflags += "C" # INITCLEARED
+       if kwq.kw_kflags & 0x2:
+               kflags += "Z" # ZEROED
+       if kwq.kw_kflags & 0x4:
+               kflags += "Q" # QOS APPLIED
+       if kwq.kw_kflags & 0x8:
+               kflags += "O" # OVERLAP
+
+       rs = format.format(kwq, kwq.kw_addr, kwqtype, pflags, kflags, kwq.kw_iocount, kwq.kw_dropcount, kwq.kw_inqueue, kwq.kw_fakecount)
+
+       rs += "\t{:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s}\n".format('lowest', 'highest', 'lword', 'uword', 'sword', 'last', 'next')
+       rs += "\t{:<10d} {:<10d} {:<10s} {:<10d} {:<10s} {:<10s} {:<10s}\n".format(
+                       GetSeqCount(kwq.kw_lowseq), GetSeqCount(kwq.kw_highseq),
+                       GetLSeqSummary(kwq.kw_lword), GetSeqCount(kwq.kw_uword),
+                       GetSSeqSummary(kwq.kw_sword), GetSSeqSummary(kwq.kw_lastseqword),
+                       GetSSeqSummary(kwq.kw_nextseqword))
+
+       rs += "\t{:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s}\n".format(
+                       'pposts', 'lseq', 'sseq', 'intr', 'count', 'seq', 'bits')
+
+       intr_type = "NONE"
+       if kwq.kw_intr.type == 0x1:
+               intr_type = "READ"
+       elif kwq.kw_intr.type == 0x2:
+               intr_type = "WRITE"
+
+       rs += "\t{:<10d} {:<10s} {:<10s} {:<10s} {:<10d} {:<10s} {:<10s}\n".format(
+                       kwq.kw_prepost.count,
+                       GetLSeqSummary(kwq.kw_prepost.lseq), GetSSeqSummary(kwq.kw_prepost.sseq),
+                       intr_type, kwq.kw_intr.count,
+                       GetSSeqSummary(kwq.kw_intr.seq), GetSSeqSummary(kwq.kw_intr.returnbits))
+
+       rs += "\twaiting readers:\n"
+       for kwe in IterateTAILQ_HEAD(kwq.kw_ksynqueues[0].ksynq_kwelist, 'kwe_list'):
+               rs += "\t" + GetKweSummary.header + "\n"
+               rs += "\t" + GetKweSummary(kwe) + "\n"
+
+       rs += "\twaiting writers:\n"
+       for kwe in IterateTAILQ_HEAD(kwq.kw_ksynqueues[1].ksynq_kwelist, 'kwe_list'):
+               rs += "\t" + GetKweSummary.header + "\n"
+               rs += "\t" + GetKweSummary(kwe) + "\n"
+
+       if kwq.kw_turnstile:
+               rs += GetTurnstileSummary.header + "\n"
+               rs += GetTurnstileSummary(Cast(kwq.kw_turnstile, "struct turnstile *"))
+
+       return rs
+
+@lldb_type_summary(['ksyn_waitq_element *', 'ksyn_waitq_element_t'])
+@header("{:<20s} {:<20s} {:<10s} {:<10s} {:<20s} {:<20s}".format('kwe', 'kwq', 'lseq', 'state', 'uthread', 'thread'))
+def GetKweSummary(kwe):
+       format = "{:<#20x} {:<#20x} {:<10s} {:<10s} {:<#20x} {:<#20x}"
+       kwe = Cast(kwe, 'struct ksyn_waitq_element *')
+       state = ""
+       if kwe.kwe_state == 1:
+               state = "INWAIT"
+       elif kwe.kwe_state == 2:
+               state = "PPOST"
+       elif kwe.kwe_state == 3:
+               state = "BROAD"
+       else:
+               state = "{:#10x}".format(kwe.kwe_state)
+       return format.format(kwe, kwe.kwe_kwqqueue, GetLSeqSummary(kwe.kwe_lockseq), state, kwe.kwe_uth, kwe.kwe_thread)
+
+@header("{0: <24s} {1: <24s} {2: <24s}".format('thread', 'thread_id', 'uthread'))
+def GetPthreadSummary(thread):
+       format = "{0: <24s} {1: <24s} {2: <24s}"
+
+       threadstr = str("{0: <#020x}".format(thread))
+       if int(thread.static_param):
+               threadstr += "[WQ]"
+
+       uthread = Cast(thread.uthread, "uthread_t")
+       uthreadstr = str("{0: <#020x}".format(uthread))
+
+
+       return format.format(threadstr, hex(thread.thread_id), uthreadstr)
+
+@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags'))
+def GetPthreadWorkqueueSummary(wq):
+       format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}"
+       procstr = str("{0: <#020x}".format(wq.wq_proc))
+       wqstr = str("{0: <#020x}".format(wq))
+       
+       flags = []
+       if wq.wq_flags & 0x1:
+               flags.append("I")
+       if wq.wq_flags & 0x2:
+               flags.append("R")
+       if wq.wq_flags & 0x4:
+               flags.append("E")
+               
+       wqflags = []
+       if wq.wq_lflags & 0x1:
+               wqflags.append("B")
+       if wq.wq_lflags & 0x2:
+               wqflags.append("W")
+       if wq.wq_lflags & 0x4:
+               wqflags.append("C")
+       if wq.wq_lflags & 0x8:
+               wqflags.append("L")
+       
+       return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags))
+
+@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event'))
+def GetPthreadWorkqueueDetail(wq):
+       format = "  {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}"
+       # requests
+       schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6])
+       activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6])
+       return "\n".join([schedstr, activestr])
+
+@lldb_command('showthreadpsynch')
+def PthreadCurrentMutex(cmd_args=None):
+       """
+       display information about a thread's pthread state
+       Syntax: (lldb) showthreadpsync <thread_t>
+       """
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+
+       thread = kern.GetValueFromAddress(cmd_args[0], "thread_t")
+       print GetPthreadSummary.header
+       print GetPthreadSummary(thread)
+
+       uthread = Cast(thread.uthread, "uthread_t")
+       kwe = Cast(addressof(uthread.uu_save.uus_kwe), 'struct ksyn_waitq_element *')
+       if not kwe or not kwe.kwe_kwqqueue:
+               print GetKweSummary.header
+               print GetKweSummary(kwe)
+       else:
+               print GetKwqSummary.header
+               print GetKwqSummary(kwe.kwe_kwqqueue)
+
+@lldb_command('showpthreadkwq')
+def PthreadShowKsynQueue(cmd_args=None):
+       """
+       display information about a pthread ksyn_wait_queue_t
+       Syntax: (lldb) showpthreadkwq <ksyn_wait_queue_t>
+       """
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+
+       kwq = kern.GetValueFromAddress(cmd_args[0], "ksyn_wait_queue_t")
+       print GetKwqSummary.header
+       print GetKwqSummary(kwq)
+
+@lldb_command('showpthreadkwe')
+def PthreadShowKsynElement(cmd_args=None):
+       """
+       display information about a thread's ksyn_waitq_element
+       Syntax: (lldb) showpthreadkwe <ksyn_waitq_element_t>    
+       """
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+
+       kwe = kern.GetValueFromAddress(cmd_args[0], "struct ksyn_waitq_element *")
+       print GetKweSummary.header
+       print GetKweSummary(kwe)
+
+@lldb_command('showpthreadworkqueue')
+def ShowPthreadWorkqueue(cmd_args=None):
+       """
+       display information about a processes' pthread workqueue
+       Syntax: (lldb) showpthreadworkqueue <proc_t>
+       """
+       
+       if not cmd_args:
+               raise ArgumentError("No arguments passed")
+               
+       proc = kern.GetValueFromAddress(cmd_args[0], "proc_t")
+       wq = Cast(proc.p_wqptr, "struct workqueue *");
+       
+       print GetPthreadWorkqueueSummary.header
+       print GetPthreadWorkqueueSummary(wq)
+       
+       print GetPthreadWorkqueueDetail.header
+       print GetPthreadWorkqueueDetail(wq)
+
+def IterateTAILQ_HEAD(headval, element_name):
+    """ iterate over a TAILQ_HEAD in kernel. refer to bsd/sys/queue.h
+        params:
+            headval     - value : value object representing the head of the list
+            element_name- str          :  string name of the field which holds the list links.
+        returns:
+            A generator does not return. It is used for iterating.
+            value : an object that is of type as headval->tqh_first. Always a pointer object
+        example usage:
+          list_head = kern.GetGlobalVariable('mountlist')
+          for entryobj in IterateTAILQ_HEAD(list_head, 'mnt_list'):
+            print GetEntrySummary(entryobj)
+    """
+    iter_val = headval.tqh_first
+    while unsigned(iter_val) != 0 :
+        yield iter_val
+        iter_val = iter_val.__getattr__(element_name).tqe_next
+    #end of yield loop
+
+def __lldb_init_module(debugger, internal_dict):
+       pass
diff --git a/lldbmacros/pthread.py b/lldbmacros/pthread.py

deleted file mode 100644 (file)

index a24779c..0000000
--- a/lldbmacros/pthread.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from xnu import *
-import struct
-
-@header("{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}".format('sig', 'tid', 'options', 'lseq', 'useq'))
-def GetUserMutexSummary(task, uaddr):
-       if int(task.t_flags) & 0x1:
-               mtxlayout = "QIIhhIQIII"
-               padoffset = 1
-       else:
-               mtxlayout = "QIIhhQIII"
-               padoffset = 0
-
-       data = GetUserDataAsString(task, uaddr, struct.calcsize(mtxlayout))
-       info = struct.unpack(mtxlayout, data)
-
-       format = "{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}"
-       sigstr = str("{0: <#020x}".format(info[0]))
-
-       # the options field dictates whether we were created misaligned
-       if info[2] & 0x800:
-               lseq = info[7+padoffset]
-               useq = info[8+padoffset]
-       else:
-               lseq = info[6+padoffset]
-               useq = info[7+padoffset]
-
-       return format.format(sigstr, hex(info[5+padoffset]), hex(info[2]), hex(lseq), hex(useq))
-
-@lldb_command('showusermutex')
-def PthreadShowUserMutex(cmd_args=None):
-       """
-       display information about a userspace mutex at a given address
-       Syntax: (lldb) showusermutex <task_t> <uaddr>
-       """
-       if not cmd_args:
-               raise ArgumentError("No arguments passed")
-       task = kern.GetValueFromAddress(cmd_args[0], "task_t")
-       uaddr = kern.GetValueFromAddress(cmd_args[1], "user_addr_t")
-
-       print GetUserMutexSummary.header
-       print GetUserMutexSummary(task, uaddr)
-
-@lldb_type_summary(['ksyn_waitq_element *', 'ksyn_waitq_element_t'])
-@header("{0: <24s} {1: <24s} {2: <24s} {3: <10s}".format('kwe', 'kwq', 'uaddr', 'type'))
-def GetKweSummary(kwe):
-       format = "{0: <24s} {1: <24s} {2: <24s} {3: <10s}"
-       kwe = Cast(addressof(kwe), "ksyn_waitq_element_t")
-       kwestr = str("{0: <#020x}".format(kwe))
-
-       kwq = Cast(kwe.kwe_kwqqueue, "ksyn_wait_queue_t")
-       kwqstr = str("{0: <#020x}".format(kwq))
-       uaddrstr = str("{0: <#020x}".format(kwq.kw_addr))
-
-       kwqtype = ""
-       if kwq.kw_type & 0xff == 0x01:
-               kwqtype = "mtx"
-       if kwq.kw_type & 0xff == 0x02:
-               kwqtype = "cvar"
-       if kwq.kw_type & 0xff == 0x04:
-               kwqtype = "rwlock"
-       if kwq.kw_type & 0xff == 0x05:
-               kwqtype = "sema"
-
-       return format.format(kwestr, kwqstr, uaddrstr, kwqtype)
-
-@header("{0: <24s} {1: <24s} {2: <24s}".format('thread', 'thread_id', 'uthread'))
-def GetPthreadSummary(thread):
-       format = "{0: <24s} {1: <24s} {2: <24s}"
-
-       threadstr = str("{0: <#020x}".format(thread))
-       if int(thread.static_param):
-               threadstr += "[WQ]"
-
-       uthread = Cast(thread.uthread, "uthread_t")
-       uthreadstr = str("{0: <#020x}".format(uthread))
-
-
-       return format.format(threadstr, hex(thread.thread_id), uthreadstr)
-
-@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags'))
-def GetPthreadWorkqueueSummary(wq):
-       format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}"
-       procstr = str("{0: <#020x}".format(wq.wq_proc))
-       wqstr = str("{0: <#020x}".format(wq))
-       
-       flags = []
-       if wq.wq_flags & 0x1:
-               flags.append("I")
-       if wq.wq_flags & 0x2:
-               flags.append("R")
-       if wq.wq_flags & 0x4:
-               flags.append("E")
-               
-       wqflags = []
-       if wq.wq_lflags & 0x1:
-               wqflags.append("B")
-       if wq.wq_lflags & 0x2:
-               wqflags.append("W")
-       if wq.wq_lflags & 0x4:
-               wqflags.append("C")
-       if wq.wq_lflags & 0x8:
-               wqflags.append("L")
-       
-       return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags))
-
-@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event'))
-def GetPthreadWorkqueueDetail(wq):
-       format = "  {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}"
-       # requests
-       schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6])
-       activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6])
-       return "\n".join([schedstr, activestr])
-
-@lldb_command('showpthreadstate')
-def PthreadCurrentMutex(cmd_args=None):
-       """
-       display information about a thread's pthread state
-       Syntax: (lldb) showpthreadstate <thread_t>
-       """
-       if not cmd_args:
-               raise ArgumentError("No arguments passed")
-
-       thread = kern.GetValueFromAddress(cmd_args[0], "thread_t")
-       print GetPthreadSummary.header
-       print GetPthreadSummary(thread)
-
-       uthread = Cast(thread.uthread, "uthread_t")
-       kwe = addressof(uthread.uu_kevent.uu_kwe)
-       print GetKweSummary.header
-       print GetKweSummary(kwe)
-
-@lldb_command('showpthreadworkqueue')
-def ShowPthreadWorkqueue(cmd_args=None):
-       """
-       display information about a processes' pthread workqueue
-       Syntax: (lldb) showpthreadworkqueue <proc_t>
-       """
-       
-       if not cmd_args:
-               raise ArgumentError("No arguments passed")
-               
-       proc = kern.GetValueFromAddress(cmd_args[0], "proc_t")
-       wq = Cast(proc.p_wqptr, "struct workqueue *");
-       
-       print GetPthreadWorkqueueSummary.header
-       print GetPthreadWorkqueueSummary(wq)
-       
-       print GetPthreadWorkqueueDetail.header
-       print GetPthreadWorkqueueDetail(wq)
-
-def __lldb_init_module(debugger, internal_dict):
-       pass
diff --git a/man/pthread_mutexattr.3 b/man/pthread_mutexattr.3

index 13e0861e834db636571c007cada52badbc9512dd..756c4079206528fcbcd42e01c17e72f53e3fa6d3 100644 (file)
--- a/man/pthread_mutexattr.3
+++ b/man/pthread_mutexattr.3
@@ -81,6 +81,10 @@
  .Fn pthread_mutexattr_settype "pthread_mutexattr_t *attr" "int type"
  .Ft int
  .Fn pthread_mutexattr_gettype "pthread_mutexattr_t *attr" "int *type"
+.Ft int
+.Fn pthread_mutexattr_setpolicy_np "pthread_mutexattr_t *attr" "int policy"
+.Ft int
+.Fn pthread_mutexattr_getpolicy_np "pthread_mutexattr_t *attr" "int *policy"
  .Sh DESCRIPTION
  Mutex attributes are used to specify parameters to
  .Fn pthread_mutex_init .
@@ -164,6 +168,31 @@ This is the default mutex type for
  functions copy the type value of the attribute to the location pointed to by the second parameter.
  .Pp
  The
+.Fn pthread_mutexattr_setpolicy_np
+function sets the mutex
+.Fa policy
+value of the attribute.
+Valid mutex policies are:
+.Bl -tag -width "XXX" -offset 2n
+.It Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+The first-fit mutex policy allows acquisition of the mutex to occur in any
+order. This policy is similar in operation to os_unfair_lock, new contending
+acquirers may obtain ownership of the mutex ahead of existing waiters.
+.It Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+The fairshare mutex policy guarantees that ownership of a contended mutex will
+be granted to waiters on a strictly ordered first-in, first-out basis. That is,
+a mutex holder that unlocks the mutex and then attempts to relock will wait
+behind existing threads already waiting on the mutex before being granted
+ownership again.
+.El
+.Pp
+The
+.Fn pthread_mutexattr_getpolicy_np
+function copies the mutex
+.Fa policy
+value of the attribute to the location pointed to by the second parameter.
+.Pp
+The
  .Fn pthread_mutexattr_set*
  functions set the attribute that corresponds to each function name.
  .Pp
@@ -174,6 +203,39 @@ to the location pointed to by the second function parameter.
  .Sh RETURN VALUES
  If successful, these functions return 0.
  Otherwise, an error number is returned to indicate the error.
+.Sh ENVIRONMENT
+The following environment variables change the behavior of the pthread mutex
+implementation.
+.Bl -tag -width "XXX" -offset 2n
+.It Ev PTHREAD_MUTEX_DEFAULT_POLICY
+Controls the process-wide policy used when initializing a pthread_mutex_t that
+has not had a policy set via
+.Fn pthread_mutexattr_setpolicy_np .
+The valid values are mapped as:
+.Pp
+.Bl -tag -width "XXX"
+.It Fa 1
+.Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+.It Fa 3
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+.El
+.El
+.Sh BACKWARDS COMPATIBILITY
+Prior to macOS 10.14 (iOS and tvOS 12.0, watchOS 5.0) the only available
+pthread mutex policy mode was
+.Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP .
+macOS 10.14 (iOS and tvOS 12.0, watchOS 5.0) introduces
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+and also makes this the default mode for mutexes initialized without a policy
+attribute set.
+.Pp
+Attempting to use
+.Fn pthread_mutexattr_setpolicy_np
+to set the policy of a pthread_mutex_t to
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+on earlier releases will fail with
+.Er EINVAL
+and the mutex will continue to operate in fairshare mode.
  .Sh ERRORS
  The
  .Fn pthread_mutexattr_init
@@ -252,6 +314,27 @@ function will fail if:
  Invalid value for
  .Fa attr .
  .El
+.Pp
+The
+.Fn pthread_mutexattr_setpolicy_np
+function will fail if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Invalid value for
+.Fa attr .
+.El
+.Pp
+The
+.Fn pthread_mutexattr_getpolicy_np
+function will fail if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The value specified either by
+.Fa type
+or
+.Fa attr
+is invalid.
+.El
  .Sh SEE ALSO
  .Xr pthread_mutex_init 3
  .Sh STANDARDS
diff --git a/private/dependency_private.h b/private/dependency_private.h

new file mode 100644 (file)

index 0000000..77d209f
--- /dev/null
+++ b/private/dependency_private.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef __PTHREAD_DEPENDENCY_PRIVATE__
+#define __PTHREAD_DEPENDENCY_PRIVATE__
+
+#include <os/base.h>
+#include <sys/cdefs.h>
+#include <pthread/pthread.h>
+#include <Availability.h>
+
+__BEGIN_DECLS
+
+OS_ASSUME_NONNULL_BEGIN
+
+/*!
+ * @typedef pthread_dependency_t
+ *
+ * @abstract
+ * A pthread dependency is a one-time dependency between a thread producing
+ * a value and a waiter thread, expressed to the system in a way
+ * that priority inversion avoidance can be applied if necessary.
+ *
+ * @discussion
+ * These tokens are one-time use, and meant to be on the stack of the waiter
+ * thread.
+ *
+ * These tokens must be both fulfilled and waited on, exactly one of each.
+ */
+typedef struct pthread_dependency_s {
+       uint32_t __pdep_owner;
+       uint32_t __pdep_opaque1;
+       uint64_t __pdep_opaque2;
+} pthread_dependency_t;
+
+/*!
+ * @typedef pthread_dependency_attr_t
+ *
+ * @abstract
+ * An opaque type to allow for future expansion of the pthread_dependency
+ * interface.
+ */
+typedef struct pthread_dependency_attr_s pthread_dependency_attr_t;
+
+#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE) || defined(__cplusplus)
+/*!
+ * @macro PTHREAD_DEPENDENCY_INITIALIZER_NP
+ *
+ * @abstract
+ * Initialize a one-time dependency token.
+ *
+ * @param __pthread
+ * The thread that will be waited on for this dependency to be fulfilled.
+ * It is expected that this thread will call pthread_dependency_fulfill_np().
+ */
+#define PTHREAD_DEPENDENCY_INITIALIZER_NP(__pthread) \
+               { pthread_mach_thread_np(__pthread), 0, 0 }
+#endif
+
+/*!
+ * @function pthread_dependency_init_np
+ *
+ * @abstract
+ * Initialize a dependency token.
+ *
+ * @param __dependency
+ * A pointer to a dependency token to initialize.
+ *
+ * @param __pthread
+ * The thread that will be waited on for this dependency to be fulfilled.
+ * It is expected that this thread will call pthread_dependency_fulfill_np().
+ *
+ * @param __attrs
+ * This argument is reserved for future expansion purposes, and NULL should be
+ * passed.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NONNULL2 OS_NOTHROW
+void pthread_dependency_init_np(pthread_dependency_t *__dependency,
+               pthread_t __pthread, pthread_dependency_attr_t *_Nullable __attrs);
+
+/*!
+ * @function pthread_dependency_fulfill_np
+ *
+ * @abstract
+ * Fulfill a dependency.
+ *
+ * @discussion
+ * Calling pthread_dependency_fulfill_np() with a token that hasn't been
+ * initialized yet, or calling pthread_dependency_fulfill_np() on the same
+ * dependency token more than once is undefined and will cause the process
+ * to be terminated.
+ *
+ * The thread that calls pthread_dependency_fulfill_np() must be the same
+ * as the pthread_t that was specified when initializing the token. Not doing so
+ * is undefined and will cause the process to be terminated.
+ *
+ * @param __dependency
+ * A pointer to a dependency token that was previously initialized.
+ *
+ * @param __value
+ * An optional value that can be returned through the dependency token
+ * to the waiter.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NOTHROW
+void pthread_dependency_fulfill_np(pthread_dependency_t *__dependency,
+               void * _Nullable __value);
+
+/*!
+ * @function pthread_dependency_wait_np
+ *
+ * @abstract
+ * Wait on a dependency.
+ *
+ * @discussion
+ * Calling pthread_dependency_wait_np() with a token that hasn't been
+ * initialized yet, or calling pthread_dependency_wait_np() on the same
+ * dependency token more than once is undefined and will cause the process
+ * to be terminated.
+ *
+ * If the dependency is not fulfilled yet when this function is called, priority
+ * inversion avoidance will be applied to the thread that was specified when
+ * initializing the token, to ensure that it can call
+ * pthread_dependency_fulfill_np() without causing a priority inversion for the
+ * thread calling pthread_dependency_wait_np().
+ *
+ * @param __dependency
+ * A pointer to a dependency token that was previously initialized with
+ * PTHREAD_DEPENDENCY_INITIALIZER_NP() or pthread_dependency_init_np().
+ *
+ * @returns
+ * The value that was passed to pthread_dependency_fulfill_np() as the `__value`
+ * argument.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NOTHROW
+void *_Nullable pthread_dependency_wait_np(pthread_dependency_t *__dependency);
+
+OS_ASSUME_NONNULL_END
+
+__END_DECLS
+
+#endif //__PTHREAD_DEPENDENCY_PRIVATE__
diff --git a/private/private.h b/private/private.h

index b98a350e0f28f0aa3f2894242634c5202cec5c99..b321442be2f0e9fe0f65c0a3d8c12ab6b6e32762 100644 (file)
--- a/private/private.h
+++ b/private/private.h
@@ -93,6 +93,8 @@ int pthread_chdir_np(char *path);
  __API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
  int pthread_fchdir_np(int fd);
  
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int pthread_attr_setcpupercent_np(pthread_attr_t * __restrict, int, unsigned long);
  
  #ifdef _os_tsd_get_base
  
@@ -107,17 +109,17 @@ __header_always_inline uint64_t
  _pthread_threadid_self_np_direct(void)
  {
  #ifndef __i386__
-    if (_pthread_has_direct_tsd()) {
+       if (_pthread_has_direct_tsd()) {
  #ifdef OS_GS_RELATIVE
-        return *(uint64_t OS_GS_RELATIVE *)(_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
+               return *(uint64_t OS_GS_RELATIVE *)(_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
  #else
-        return *(uint64_t*)((char *)_os_tsd_get_base() + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
+               return *(uint64_t*)((char *)_os_tsd_get_base() + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
  #endif
-    }
+       }
  #endif
-    uint64_t threadid = 0;
-    pthread_threadid_np(NULL, &threadid);
-    return threadid;
+       uint64_t threadid = 0;
+       pthread_threadid_np(NULL, &threadid);
+       return threadid;
  }
  
  #endif // _os_tsd_get_base
diff --git a/private/qos_private.h b/private/qos_private.h

index 50f273a0de43877198a686bbb2c5d20002003020..6068a822c068662ccbf438a6d77a81a1145e9821 100644 (file)
--- a/private/qos_private.h
+++ b/private/qos_private.h
@@ -25,6 +25,7 @@
  #define _QOS_PRIVATE_H
  
  #include <pthread/qos.h>
+#include <pthread/priority_private.h>
  #include <sys/qos.h> /* qos_class_t */
  #include <sys/qos_private.h>
  
@@ -33,48 +34,6 @@
  #include <mach/port.h>
  #endif
  
-// pthread_priority_t is an on opaque integer that is guaranteed to be ordered such that
-// combations of QoS classes and relative priorities are ordered numerically, according to
-// their combined priority.
-typedef unsigned long pthread_priority_t;
-
-// masks for splitting the handling the contents of a pthread_priority_t, the mapping from
-// qos_class_t to the class bits, however, is intentionally not exposed.
-#define _PTHREAD_PRIORITY_FLAGS_MASK                   0xff000000
-#define _PTHREAD_PRIORITY_FLAGS_SHIFT                  (24ull)
-#define _PTHREAD_PRIORITY_ENCODING_MASK                        0x00a00000
-#define _PTHREAD_PRIORITY_ENCODING_SHIFT               (22ull)
-#define _PTHREAD_PRIORITY_ENCODING_V0                  0x00000000
-#define _PTHREAD_PRIORITY_ENCODING_V1                  0x00400000 /* unused */
-#define _PTHREAD_PRIORITY_ENCODING_V2                  0x00800000 /* unused */
-#define _PTHREAD_PRIORITY_ENCODING_V3                  0x00a00000 /* unused */
-#define _PTHREAD_PRIORITY_QOS_CLASS_MASK               0x003fff00
-#define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT              (8ull)
-#define _PTHREAD_PRIORITY_PRIORITY_MASK                        0x000000ff
-#define _PTHREAD_PRIORITY_PRIORITY_SHIFT               (0)
-
-#define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG              0x80000000
-#define _PTHREAD_PRIORITY_INHERIT_FLAG                 0x40000000
-#define _PTHREAD_PRIORITY_ROOTQUEUE_FLAG               0x20000000
-// Used to indicate to the pthread kext that the provided event manager thread
-// priority is actually a scheduling priority not a QoS.  We can have ROOTQUEUE_FLAG
-// perform double duty because it's never provided to the kernel.
-#define _PTHREAD_PRIORITY_SCHED_PRI_FLAG               0x20000000
-#define _PTHREAD_PRIORITY_SCHED_PRI_MASK               0x0000ffff
-#define _PTHREAD_PRIORITY_ENFORCE_FLAG                 0x10000000
-#define _PTHREAD_PRIORITY_OVERRIDE_FLAG                        0x08000000
-
-// libdispatch defines the following, so it's not safe to use for anything we
-// expect to be passed in from userspace
-#define _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG            0x04000000
-
-// The event manager flag indicates that this thread/request is for a event
-// manager thread.  There can only ever be one event manager thread at a time and
-// it is brought up at the highest of all event manager priorities passed to the
-// kext.
-#define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG   0x02000000
-#define _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG            0x01000000
-
  // redeffed here to avoid leaving __QOS_ENUM defined in the public header
  #define __QOS_ENUM(name, type, ...) enum { __VA_ARGS__ }; typedef type name##_t
  #define __QOS_AVAILABLE_10_10
diff --git a/private/tsd_private.h b/private/tsd_private.h

index f91c1f6cf206875ceffa419853a2f82c2490c05e..f9260fb01b9f697e1bafcea12713c5dfb9a46f87 100644 (file)
--- a/private/tsd_private.h
+++ b/private/tsd_private.h
@@ -68,6 +68,10 @@
  #define __TSD_RETURN_TO_KERNEL 5
  #endif
  
+#ifndef __TSD_PTR_MUNGE
+#define __TSD_PTR_MUNGE 7
+#endif
+
  #ifndef __TSD_MACH_SPECIAL_REPLY
  #define __TSD_MACH_SPECIAL_REPLY 8
  #endif
@@ -81,6 +85,7 @@
  #define _PTHREAD_TSD_SLOT_MACH_THREAD_SELF __TSD_MACH_THREAD_SELF
  #define _PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS    __TSD_THREAD_QOS_CLASS
  #define _PTHREAD_TSD_SLOT_RETURN_TO_KERNEL __TSD_RETURN_TO_KERNEL
+#define _PTHREAD_TSD_SLOT_PTR_MUNGE __TSD_PTR_MUNGE
  #define _PTHREAD_TSD_SLOT_MACH_SPECIAL_REPLY __TSD_MACH_SPECIAL_REPLY
  //#define _PTHREAD_TSD_SLOT_SEMAPHORE_CACHE __TSD_SEMAPHORE_CACHE
  
diff --git a/private/workqueue_private.h b/private/workqueue_private.h

index 0b0a001e9bbeb51de9f45492b4f0ecc984f943a6..9cd0e951d6df271b6fbb472a48d552a7b3572436 100644 (file)
--- a/private/workqueue_private.h
+++ b/private/workqueue_private.h
@@ -179,6 +179,14 @@ __API_AVAILABLE(macos(10.10.2))
  int
  _pthread_workqueue_asynchronous_override_reset_all_self(void);
  
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int
+_pthread_workloop_create(uint64_t workloop_id, uint64_t options, pthread_attr_t *attr);
+
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int
+_pthread_workloop_destroy(uint64_t workloop_id);
+
  __END_DECLS
  
  #endif // __PTHREAD_WORKQUEUE_H__
diff --git a/pthread/introspection.h b/pthread/introspection.h

index 18292085d5c1ce4c7a5611c338eeffd7ec86acbf..10b719a3da20ce9fc4f51a014fa243287e2a2609 100644 (file)
--- a/pthread/introspection.h
+++ b/pthread/introspection.h
@@ -64,18 +64,40 @@ typedef void (*pthread_introspection_hook_t)(unsigned int event,
  
  /*!
   * @enum pthread_introspection_event_t
+ * Events sent by libpthread about threads lifetimes.
   *
- * @constant PTHREAD_INTROSPECTION_THREAD_CREATE
- * pthread_t was created.
+ * @const PTHREAD_INTROSPECTION_THREAD_CREATE
+ * The specified pthread_t was created, and there will be a paired
+ * PTHREAD_INTROSPECTION_THREAD_DESTROY event. However, there may not be
+ * a START/TERMINATE pair of events for this pthread_t.
   *
- * @constant PTHREAD_INTROSPECTION_THREAD_START
- * Thread has started and stack was allocated.
+ * Starting with macOS 10.14, and iOS 12, this event is always sent before
+ * PTHREAD_INTROSPECTION_THREAD_START is sent. This event is however not sent
+ * for the main thread.
   *
- * @constant PTHREAD_INTROSPECTION_THREAD_TERMINATE
- * Thread is about to be terminated and stack will be deallocated.
+ * This event may not be sent from the context of the passed in pthread_t.
   *
- * @constant PTHREAD_INTROSPECTION_THREAD_DESTROY
- * pthread_t is about to be destroyed.
+ * Note that all properties of this thread may not be functional yet, and it is
+ * not permitted to call functions on this thread past observing its address.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_START
+ * Thread has started and its stack was allocated. There will be a matching
+ * PTHREAD_INTROSPECTION_THREAD_TERMINATE event.
+ *
+ * This event is always sent from the context of the passed in pthread_t.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_TERMINATE
+ * Thread is about to be terminated and stack will be deallocated. This always
+ * matches a PTHREAD_INTROSPECTION_THREAD_START event.
+ *
+ * This event is always sent from the context of the passed in pthread_t.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_DESTROY
+ * pthread_t is about to be destroyed. This always matches
+ * a PTHREAD_INTROSPECTION_THREAD_CREATE event, but there may not have been
+ * a START/TERMINATE pair of events for this pthread_t.
+ *
+ * This event may not be sent from the context of the passed in pthread_t.
   */
  enum {
         PTHREAD_INTROSPECTION_THREAD_CREATE = 1,
diff --git a/pthread/pthread.h b/pthread/pthread.h

index 0e2ecb73b9a4baf217a471803cd9ef93f6e27c9a..f5fdff6b7e126f635b7204b0779a15be08c320c3 100644 (file)
--- a/pthread/pthread.h
+++ b/pthread/pthread.h
@@ -171,6 +171,12 @@ __BEGIN_DECLS
  #define PTHREAD_MUTEX_RECURSIVE                2
  #define PTHREAD_MUTEX_DEFAULT          PTHREAD_MUTEX_NORMAL
  
+/*
+ * Mutex policy attributes
+ */
+#define PTHREAD_MUTEX_POLICY_FAIRSHARE_NP   1
+#define PTHREAD_MUTEX_POLICY_FIRSTFIT_NP    3
+
  /*
   * RWLock variables
   */
@@ -405,6 +411,10 @@ __API_AVAILABLE(macos(10.4), ios(2.0))
  int pthread_mutexattr_gettype(const pthread_mutexattr_t * __restrict,
                 int * __restrict);
  
+__API_AVAILABLE(macos(10.13.4), ios(11.3), watchos(4.3), tvos(11.3))
+int pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t * __restrict,
+               int * __restrict);
+
  __API_AVAILABLE(macos(10.4), ios(2.0))
  int pthread_mutexattr_init(pthread_mutexattr_t *);
  
@@ -420,6 +430,9 @@ int pthread_mutexattr_setpshared(pthread_mutexattr_t *, int);
  __API_AVAILABLE(macos(10.4), ios(2.0))
  int pthread_mutexattr_settype(pthread_mutexattr_t *, int);
  
+__API_AVAILABLE(macos(10.7), ios(5.0))
+int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int);
+
  __SWIFT_UNAVAILABLE_MSG("Use lazily initialized globals instead")
  __API_AVAILABLE(macos(10.4), ios(2.0))
  int pthread_once(pthread_once_t *, void (* _Nonnull)(void));
diff --git a/pthread/pthread_spis.h b/pthread/pthread_spis.h

index a0ba75430641bb3f5e143aa7470a0e496966640d..91fb6419fa42e6734f1aaf13b13fd4f0ce3f6c21 100644 (file)
--- a/pthread/pthread_spis.h
+++ b/pthread/pthread_spis.h
@@ -63,19 +63,13 @@ __BEGIN_DECLS
  #if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE)
  /* firstfit */
  #define PTHREAD_FIRSTFIT_MUTEX_INITIALIZER {_PTHREAD_FIRSTFIT_MUTEX_SIG_init, {0}}
+
  /*
   * Mutex attributes
   */
-#define _PTHREAD_MUTEX_POLICY_NONE             0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE                1
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT         2
-
-/* manipulate the mutex policy attributes */
-__API_AVAILABLE(macos(10.7), ios(5.0))
-int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int );
-
-__API_AVAILABLE(macos(10.13.4), ios(11.3))
-int pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *, int * );
+#define _PTHREAD_MUTEX_POLICY_NONE                     PTHREAD_MUTEX_POLICY_NONE
+#define _PTHREAD_MUTEX_POLICY_FAIRSHARE                PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+#define _PTHREAD_MUTEX_POLICY_FIRSTFIT         PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
  
  #endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */
  
diff --git a/pthread/stack_np.h b/pthread/stack_np.h

new file mode 100644 (file)

index 0000000..9b5f513
--- /dev/null
+++ b/pthread/stack_np.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+#ifndef __PTHREAD_STACK_NP__
+#define __PTHREAD_STACK_NP__
+
+#include <Availability.h>
+#include <sys/cdefs.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <os/base.h>
+
+OS_ASSUME_NONNULL_BEGIN
+
+/*! @header
+ * Low-level API to introspect thread stacks.
+ */
+
+__BEGIN_DECLS
+
+/*!
+ * @function pthread_stack_frame_decode_np
+ *
+ * @abstract
+ * Decodes the return address and the next stack frame address
+ * from the given stack frame address.
+ *
+ * @discussion
+ * Validation of the frame address is not performed by this function.
+ * The caller is responsible for making sure the frame address is valid,
+ * for example using pthread_get_stackaddr_np() and pthread_get_stacksize_np().
+ *
+ * @param frame_addr
+ * A valid stack frame address such as __builtin_frame_address(0) or the return
+ * value of a previous call to pthread_stack_frame_decode_np().
+ *
+ * @param return_addr
+ * An optional out paramter that will be filled with the return address stored
+ * at the specified stack frame.
+ *
+ * @returns
+ * This returns the next frame address stored at the specified stack frame.
+ */
+__OSX_AVAILABLE(10.14) __IOS_AVAILABLE(12.0)
+__TVOS_AVAILABLE(12.0) __WATCHOS_AVAILABLE(5.0)
+uintptr_t
+pthread_stack_frame_decode_np(uintptr_t frame_addr,
+               uintptr_t *_Nullable return_addr);
+
+__END_DECLS
+
+OS_ASSUME_NONNULL_END
+
+#endif // __PTHREAD_STACK_NP__
diff --git a/src/internal.h b/src/internal.h

index 9f2e127f37789a1e3c1e48ca3529bed25802476c..c9c16c7ae66d9e1733e1e5b1ea7e4095a2bc2c16 100644 (file)
--- a/src/internal.h
+++ b/src/internal.h
@@ -70,6 +70,8 @@ typedef struct _pthread_attr_t pthread_attr_t;
  #include <mach/mach.h>
  #include <mach/mach_error.h>
  #include <sys/queue.h>
+#include <pthread/bsdthread_private.h>
+#include <pthread/workqueue_syscalls.h>
  
  #define __OS_EXPOSE_INTERNALS__ 1
  #include <os/internal/internal_shared.h>
@@ -125,19 +127,24 @@ typedef os_unfair_lock _pthread_lock;
  #define _PTHREAD_UNLOCK(lock) os_unfair_lock_unlock_inline(&(lock))
  #define _PTHREAD_UNLOCK_FROM_MACH_THREAD(lock) os_unfair_lock_unlock_inline_no_tsd_4libpthread(&(lock))
  
+#define _PTHREAD_POLICY_IS_FIXEDPRI(x) ((x) == SCHED_RR || (x) == SCHED_FIFO)
+
+extern int __is_threaded;
+extern int __unix_conforming;
+
  // List of all pthreads in the process.
  TAILQ_HEAD(__pthread_list, _pthread);
-extern struct __pthread_list __pthread_head;
+PTHREAD_NOEXPORT extern struct __pthread_list __pthread_head;
  
  // Lock protects access to above list.
-extern _pthread_lock _pthread_list_lock;
+PTHREAD_NOEXPORT extern _pthread_lock _pthread_list_lock;
  
-extern int __is_threaded;
+PTHREAD_NOEXPORT extern uint32_t _main_qos;
  
  #if PTHREAD_DEBUG_LOG
  #include <mach/mach_time.h>
-extern int _pthread_debuglog;
-extern uint64_t _pthread_debugstart;
+PTHREAD_NOEXPORT extern int _pthread_debuglog;
+PTHREAD_NOEXPORT extern uint64_t _pthread_debugstart;
  #endif
  
  /*
@@ -153,6 +160,8 @@ extern uint64_t _pthread_debugstart;
  #define _INTERNAL_POSIX_THREAD_KEYS_END 768
  #endif
  
+#define PTHREAD_T_OFFSET 0
+
  #define MAXTHREADNAMESIZE      64
  #define _PTHREAD_T
  typedef struct _pthread {
@@ -165,52 +174,56 @@ typedef struct _pthread {
         //
         // SPI - These fields are private.
         //
-       // these fields are globally protected by _pthread_list_lock:
-       uint32_t childrun:1,
-                       parentcheck:1,
-                       childexit:1,
-                       pad3:29;
-
-       _pthread_lock lock; // protect access to everything below
-       uint32_t detached:8,
-                       inherit:8,
-                       policy:8,
-                       kernalloc:1,
-                       schedset:1,
-                       wqthread:1,
-                       wqkillset:1,
-                       pad:4;
-
-#if defined(__LP64__)
-       uint32_t pad0;
-#endif
-
-       void *(*fun)(void*);    // thread start routine
-       void *arg;              // thread start routine argument
-       void *exit_value;       // thread exit value storage
-
-       semaphore_t joiner_notify;      // pthread_join notification
-
-       int max_tsd_key;
-       int cancel_state;       // whether the thread can be cancelled
-       int cancel_error;
  
-       int err_no;             // thread-local errno
+       //
+       // Fields protected by _pthread_list_lock
+       //
  
-       struct _pthread *joiner;
+       TAILQ_ENTRY(_pthread) tl_plist; // global thread list [aligned]
+       struct pthread_join_context_s *tl_join_ctx;
+       void *tl_exit_value;
+       uint32_t tl_policy:8,
+                       tl_joinable:1,
+                       tl_joiner_cleans_up:1,
+                       tl_has_custom_stack:1,
+                       __tl_pad:21;
+       // MACH_PORT_NULL if no joiner
+       // tsd[_PTHREAD_TSD_SLOT_MACH_THREAD_SELF] when has a joiner
+       // MACH_PORT_DEAD if the thread exited
+       uint32_t tl_exit_gate;
+       struct sched_param tl_param;
  
-       struct sched_param param;       // [aligned]
+       //
+       // Fields protected by pthread_t::lock
+       //
  
-       TAILQ_ENTRY(_pthread) plist;    // global thread list [aligned]
+       _pthread_lock lock;
+       uint16_t max_tsd_key;
+       uint16_t inherit:8,
+                       kernalloc:1,
+                       schedset:1,
+                       wqthread:1,
+                       wqkillset:1,
+                       wqoutsideqos:1,
+                       __flags_pad:3;
  
         char pthread_name[MAXTHREADNAMESIZE];   // includes NUL [aligned]
  
-       void *stackaddr;        // base of the stack (page aligned)
-       size_t stacksize;       // size of stack (page multiple and >= PTHREAD_STACK_MIN)
-
-       void* freeaddr;         // stack/thread allocation base address
-       size_t freesize;        // stack/thread allocation size
-       size_t guardsize;       // guard page size in bytes
+       void *(*fun)(void *);   // thread start routine
+       void *wq_kqid_ptr;              // wqthreads (workloop)
+       void *arg;                              // thread start routine argument
+       int   wq_nevents;               // wqthreads (workloop / kevent)
+       uint16_t wq_retop;              // wqthreads
+       uint8_t cancel_state;   // whether the thread can be canceled [atomic]
+       uint8_t canceled;               // 4597450 set if conformant cancelation happened
+       errno_t cancel_error;
+       errno_t err_no;                 // thread-local errno
+
+       void *stackaddr;                // base of the stack (page aligned)
+       void *stackbottom;              // stackaddr - stacksize
+       void *freeaddr;                 // stack/thread allocation base address
+       size_t freesize;                // stack/thread allocation size
+       size_t guardsize;               // guard page size in bytes
  
         // tsd-base relative accessed elements
         __attribute__((aligned(8)))
@@ -228,39 +241,39 @@ typedef struct _pthread {
         void *tsd[_EXTERNAL_POSIX_THREAD_KEYS_MAX + _INTERNAL_POSIX_THREAD_KEYS_MAX];
  } *pthread_t;
  
-
+#define _PTHREAD_ATTR_REFILLMS_MAX ((2<<24) - 1)
  struct _pthread_attr_t {
-       long sig;
-       _pthread_lock lock;
-       uint32_t detached:8,
+       long   sig;
+       size_t guardsize; // size in bytes of stack overflow guard area
+       void  *stackaddr; // stack base; vm_page_size aligned
+       size_t stacksize; // stack size; multiple of vm_page_size and >= PTHREAD_STACK_MIN
+       union {
+               struct sched_param param; // [aligned]
+               unsigned long qosclass; // pthread_priority_t
+       };
+       uint32_t
+               detached:8,
                 inherit:8,
                 policy:8,
-               fastpath:1,
                 schedset:1,
                 qosset:1,
-               unused:5;
-       struct sched_param param; // [aligned]
-       void *stackaddr; // stack base; vm_page_size aligned
-       size_t stacksize; // stack size; multiple of vm_page_size and >= PTHREAD_STACK_MIN
-       size_t guardsize; // size in bytes of stack overflow guard area
-       unsigned long qosclass;
+               policyset:1,
+               cpupercentset:1,
+               defaultguardpage:1,
+               unused:3;
+       uint32_t
+               cpupercent:8,
+               refillms:24;
  #if defined(__LP64__)
-       uint32_t _reserved[2];
+       uint32_t _reserved[4];
  #else
-       uint32_t _reserved[1];
+       uint32_t _reserved[2];
  #endif
  };
  
  /*
   * Mutex attributes
   */
-#define _PTHREAD_MUTEX_POLICY_NONE             0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE                1
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT         2
-#define _PTHREAD_MUTEX_POLICY_REALTIME         3
-#define _PTHREAD_MUTEX_POLICY_ADAPTIVE         4
-#define _PTHREAD_MUTEX_POLICY_PRIPROTECT       5
-#define _PTHREAD_MUTEX_POLICY_PRIINHERIT       6
  
  #define _PTHREAD_MUTEXATTR_T
  typedef struct {
@@ -269,7 +282,7 @@ typedef struct {
         uint32_t protocol:2,
                 type:2,
                 pshared:2,
-               policy:3,
+               opt:3,
                 unused:23;
  } pthread_mutexattr_t;
  
@@ -285,6 +298,21 @@ struct _pthread_mutex_options {
                 unused:2,
                 lock_count:16;
  };
+//
+#define _PTHREAD_MUTEX_POLICY_LAST             (PTHREAD_MUTEX_POLICY_FIRSTFIT_NP + 1)
+#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE 1
+#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT 2
+#define _PTHREAD_MTX_OPT_POLICY_DEFAULT _PTHREAD_MTX_OPT_POLICY_FIRSTFIT
+// The following _pthread_mutex_options defintions exist in synch_internal.h
+// such that the kernel extension can test for flags. They must be kept in
+// sync with the bit values in the struct above.
+// _PTHREAD_MTX_OPT_PSHARED 0x010
+// _PTHREAD_MTX_OPT_NOTIFY 0x1000
+// _PTHREAD_MTX_OPT_MUTEX 0x2000
+
+// The fixed mask is used to mask out portions of the mutex options that
+// change on a regular basis (notify, lock_count).
+#define _PTHREAD_MTX_OPT_FIXED_MASK    0x27ff
  
  typedef struct {
         long sig;
@@ -429,12 +457,6 @@ _pthread_selfid_direct(void)
  #define _PTHREAD_KERN_MUTEX_SIG                0x34567812  /*  */
  #define _PTHREAD_KERN_RWLOCK_SIG       0x56781234  /*  */
  
-#define _PTHREAD_CREATE_PARENT         4
-#define _PTHREAD_EXITED                        8
-// 4597450: begin
-#define _PTHREAD_WASCANCEL             0x10
-// 4597450: end
-
  #if defined(DEBUG)
  #define _PTHREAD_MUTEX_OWNER_SELF      pthread_self()
  #else
@@ -454,11 +476,6 @@ extern boolean_t swtch_pri(int);
  /* Prototypes. */
  
  /* Internal globals. */
-PTHREAD_NOEXPORT extern int __pthread_supported_features;
-
-/* Functions defined in machine-dependent files. */
-PTHREAD_NOEXPORT void _pthread_setup(pthread_t th, void (*f)(pthread_t), void *sp, int suspended, int needresume);
-
  PTHREAD_NOEXPORT void _pthread_tsd_cleanup(pthread_t self);
  
  PTHREAD_NOEXPORT int _pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t * flagp, uint32_t ** pmtxp, uint32_t * mgenp, uint32_t * ugenp);
@@ -468,8 +485,8 @@ PTHREAD_NOEXPORT void* malloc(size_t);
  PTHREAD_NOEXPORT void free(void*);
  
  /* syscall interfaces */
-extern uint32_t __psynch_mutexwait(pthread_mutex_t * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
-extern uint32_t __psynch_mutexdrop(pthread_mutex_t * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
+extern uint32_t __psynch_mutexwait(_pthread_mutex * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
+extern uint32_t __psynch_mutexdrop(_pthread_mutex * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
  
  extern uint32_t __psynch_cvbroad(pthread_cond_t * cv, uint64_t cvlsgen, uint64_t cvudgen, uint32_t flags, pthread_mutex_t * mutex,  uint64_t mugen, uint64_t tid);
  extern uint32_t __psynch_cvsignal(pthread_cond_t * cv, uint64_t cvlsgen, uint32_t cvugen, int thread_port, pthread_mutex_t * mutex,  uint64_t mugen, uint64_t tid, uint32_t flags);
@@ -489,7 +506,9 @@ PTHREAD_EXTERN
  int
  __proc_info(int callnum, int pid, int flavor, uint64_t arg, void * buffer, int buffersize);
  
-PTHREAD_NOEXPORT int _pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming);
+PTHREAD_NOEXPORT
+void
+_pthread_deallocate(pthread_t t, bool from_mach_thread);
  
  PTHREAD_NORETURN PTHREAD_NOEXPORT
  void
@@ -499,6 +518,10 @@ PTHREAD_NORETURN PTHREAD_NOEXPORT
  void
  __pthread_abort_reason(const char *fmt, ...) __printflike(1,2);
  
+PTHREAD_NOEXPORT
+thread_qos_t
+_pthread_qos_class_to_thread_qos(qos_class_t qos);
+
  PTHREAD_NOEXPORT
  void
  _pthread_set_main_qos(pthread_priority_t qos);
@@ -515,7 +538,7 @@ PTHREAD_EXPORT
  void
  _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
  
-PTHREAD_EXPORT
+PTHREAD_NORETURN PTHREAD_EXPORT
  void
  _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int flags, int nkevents);
  
@@ -531,9 +554,13 @@ PTHREAD_NOEXPORT_VARIANT
  void
  _pthread_clear_qos_tsd(mach_port_t thread_port);
  
+#define PTHREAD_CONFORM_DARWIN_LEGACY     0
+#define PTHREAD_CONFORM_UNIX03_NOCANCEL   1
+#define PTHREAD_CONFORM_UNIX03_CANCELABLE 2
+
  PTHREAD_NOEXPORT_VARIANT
  void
-_pthread_testcancel(pthread_t thread, int isconforming);
+_pthread_testcancel(int conforming);
  
  PTHREAD_EXPORT
  void
@@ -545,11 +572,11 @@ _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport);
  
  PTHREAD_NOEXPORT
  void
-_pthread_setcancelstate_exit(pthread_t self, void *value_ptr, int conforming);
+_pthread_setcancelstate_exit(pthread_t self, void *value_ptr);
  
  PTHREAD_NOEXPORT
-void *
-_pthread_get_exit_value(pthread_t t, int conforming);
+semaphore_t
+_pthread_joiner_prepost_wake(pthread_t thread);
  
  PTHREAD_ALWAYS_INLINE
  static inline mach_port_t
@@ -647,60 +674,54 @@ _pthread_rwlock_check_signature_init(_pthread_rwlock *rwlock)
         return (rwlock->sig == _PTHREAD_RWLOCK_SIG_init);
  }
  
-/* ALWAYS called with list lock and return with list lock */
+/*
+ * ALWAYS called without list lock and return with list lock held on success
+ *
+ * This weird calling convention exists because this function will sometimes
+ * drop the lock, and it's best callers don't have to remember this.
+ */
  PTHREAD_ALWAYS_INLINE
  static inline bool
-_pthread_is_valid_locked(pthread_t thread)
+_pthread_validate_thread_and_list_lock(pthread_t thread)
  {
         pthread_t p;
+       if (thread == NULL) return false;
  loop:
-       TAILQ_FOREACH(p, &__pthread_head, plist) {
-               if (p == thread) {
-                       int state = os_atomic_load(&p->cancel_state, relaxed);
-                       if (state & _PTHREAD_CANCEL_INITIALIZED) {
-                               return true;
+       _PTHREAD_LOCK(_pthread_list_lock);
+       TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
+               if (p != thread) continue;
+               int state = os_atomic_load(&p->cancel_state, relaxed);
+               if (os_likely(state & _PTHREAD_CANCEL_INITIALIZED)) {
+                       if (os_unlikely(p->sig != _PTHREAD_SIG)) {
+                               PTHREAD_CLIENT_CRASH(0, "pthread_t was corrupted");
                         }
-                       _PTHREAD_UNLOCK(_pthread_list_lock);
-                       thread_switch(_pthread_kernel_thread(p),
-                                       SWITCH_OPTION_OSLOCK_DEPRESS, 1);
-                       _PTHREAD_LOCK(_pthread_list_lock);
-                       goto loop;
+                       return true;
                 }
+               _PTHREAD_UNLOCK(_pthread_list_lock);
+               thread_switch(_pthread_kernel_thread(p),
+                                         SWITCH_OPTION_OSLOCK_DEPRESS, 1);
+               goto loop;
         }
+       _PTHREAD_UNLOCK(_pthread_list_lock);
  
         return false;
  }
  
-#define PTHREAD_IS_VALID_LOCK_THREAD 0x1
-
  PTHREAD_ALWAYS_INLINE
  static inline bool
-_pthread_is_valid(pthread_t thread, int flags, mach_port_t *portp)
+_pthread_is_valid(pthread_t thread, mach_port_t *portp)
  {
         mach_port_t kport = MACH_PORT_NULL;
         bool valid;
  
-       if (thread == NULL) {
-               return false;
-       }
-
         if (thread == pthread_self()) {
                 valid = true;
                 kport = _pthread_kernel_thread(thread);
-               if (flags & PTHREAD_IS_VALID_LOCK_THREAD) {
-                       _PTHREAD_LOCK(thread->lock);
-               }
+       } else if (!_pthread_validate_thread_and_list_lock(thread)) {
+               valid = false;
         } else {
-               _PTHREAD_LOCK(_pthread_list_lock);
-               if (_pthread_is_valid_locked(thread)) {
-                       kport = _pthread_kernel_thread(thread);
-                       valid = true;
-                       if (flags & PTHREAD_IS_VALID_LOCK_THREAD) {
-                               _PTHREAD_LOCK(thread->lock);
-                       }
-               } else {
-                       valid = false;
-               }
+               kport = _pthread_kernel_thread(thread);
+               valid = true;
                 _PTHREAD_UNLOCK(_pthread_list_lock);
         }
  
diff --git a/src/offsets.h b/src/offsets.h

new file mode 100644 (file)

index 0000000..0e20385
--- /dev/null
+++ b/src/offsets.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef _POSIX_PTHREAD_OFFSETS_H
+#define _POSIX_PTHREAD_OFFSETS_H
+
+#ifndef __ASSEMBLER__
+#define check_backward_offset(field, value) \
+               _Static_assert(offsetof(struct _pthread, tsd) + value == \
+                               offsetof(struct _pthread, field), #value " is correct")
+#define check_forward_offset(field, value) \
+               _Static_assert(offsetof(struct _pthread, field) == value, \
+                               #value " is correct")
+#else
+#define check_backward_offset(field, value)
+#define check_forward_offset(field, value)
+#endif // __ASSEMBLER__
+
+#if defined(__i386__)
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   140
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET 144
+#elif __LP64__
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   -48
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET -40
+#else
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   -36
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET -32
+#endif
+
+#if defined(__i386__)
+check_forward_offset(stackaddr, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET);
+check_forward_offset(stackbottom, _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET);
+#else
+check_backward_offset(stackaddr, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET);
+check_backward_offset(stackbottom, _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET);
+#endif
+
+#endif /* _POSIX_PTHREAD_OFFSETS_H */
diff --git a/src/pthread.c b/src/pthread.c

index 8e63bd3a06e5321b0fd46f22cb5df93b62f7e3cb..c9c1b9bc4cceeb02dda250511b51086b09e31acf 100644 (file)
--- a/src/pthread.c
+++ b/src/pthread.c
@@ -56,6 +56,8 @@
  #include "introspection_private.h"
  #include "qos_private.h"
  #include "tsd_private.h"
+#include "pthread/stack_np.h"
+#include "offsets.h" // included to validate the offsets at build time
  
  #include <stdlib.h>
  #include <errno.h>
@@ -63,10 +65,12 @@
  #include <unistd.h>
  #include <mach/mach_init.h>
  #include <mach/mach_vm.h>
+#include <mach/mach_sync_ipc.h>
  #include <sys/time.h>
  #include <sys/resource.h>
  #include <sys/sysctl.h>
  #include <sys/queue.h>
+#include <sys/ulock.h>
  #include <sys/mman.h>
  #include <machine/vmparam.h>
  #define        __APPLE_API_PRIVATE
@@ -77,40 +81,46 @@
  #include <platform/compat.h>
  
  extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
-                    void *newp, size_t newlen);
+               void *newp, size_t newlen);
  extern void __exit(int) __attribute__((noreturn));
  extern int __pthread_kill(mach_port_t, int);
  
-extern struct _pthread _thread;
-extern int default_priority;
+extern void _pthread_joiner_wake(pthread_t thread);
  
+#if !VARIANT_DYLD
+PTHREAD_NOEXPORT extern struct _pthread *_main_thread_ptr;
+#define main_thread() (_main_thread_ptr)
+#endif // VARIANT_DYLD
  
-//
-// Global variables
-//
+// Default stack size is 512KB; independent of the main thread's stack size.
+#define DEFAULT_STACK_SIZE (size_t)(512 * 1024)
  
-static void (*exitf)(int) = __exit;
-PTHREAD_NOEXPORT void* (*_pthread_malloc)(size_t) = NULL;
-PTHREAD_NOEXPORT void (*_pthread_free)(void *) = NULL;
  
-#if PTHREAD_DEBUG_LOG
-#include <fcntl.h>
-int _pthread_debuglog;
-uint64_t _pthread_debugstart;
-#endif
-
-// This global should be used (carefully) by anyone needing to know if a
-// pthread (other than the main thread) has been created.
-int __is_threaded = 0;
+//
+// Global constants
+//
  
-int __unix_conforming = 0;
+/*
+ * The pthread may be offset into a page.  In that event, by contract
+ * with the kernel, the allocation will extend PTHREAD_SIZE from the
+ * start of the next page.  There's also one page worth of allocation
+ * below stacksize for the guard page. <rdar://problem/19941744>
+ */
+#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
+#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size)
+#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize))
  
-// _pthread_list_lock protects _pthread_count, access to the __pthread_head
-// list, and the parentcheck, childrun and childexit flags of the pthread
-// structure. Externally imported by pthread_cancelable.c.
-PTHREAD_NOEXPORT _pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER;
-PTHREAD_NOEXPORT struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head);
-static int _pthread_count = 1;
+static const pthread_attr_t _pthread_attr_default = {
+       .sig       = _PTHREAD_ATTR_SIG,
+       .stacksize = 0,
+       .detached  = PTHREAD_CREATE_JOINABLE,
+       .inherit   = _PTHREAD_DEFAULT_INHERITSCHED,
+       .policy    = _PTHREAD_DEFAULT_POLICY,
+       .defaultguardpage = true,
+       // compile time constant for _pthread_default_priority(0)
+       .qosclass  = (1U << (THREAD_QOS_LEGACY - 1 + _PTHREAD_PRIORITY_QOS_CLASS_SHIFT)) |
+                       ((uint8_t)-1 & _PTHREAD_PRIORITY_PRIORITY_MASK),
+};
  
  #if PTHREAD_LAYOUT_SPI
  
@@ -124,315 +134,154 @@ const struct pthread_layout_offsets_s pthread_layout_offsets = {
  #endif // PTHREAD_LAYOUT_SPI
  
  //
-// Static variables
+// Global exported variables
  //
  
-// Mach message notification that a thread needs to be recycled.
-typedef struct _pthread_reap_msg_t {
-       mach_msg_header_t header;
-       pthread_t thread;
-       mach_msg_trailer_t trailer;
-} pthread_reap_msg_t;
+// This global should be used (carefully) by anyone needing to know if a
+// pthread (other than the main thread) has been created.
+int __is_threaded = 0;
+int __unix_conforming = 0;
  
-/*
- * The pthread may be offset into a page.  In that event, by contract
- * with the kernel, the allocation will extend PTHREAD_SIZE from the
- * start of the next page.  There's also one page worth of allocation
- * below stacksize for the guard page. <rdar://problem/19941744>
- */
-#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
-#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size)
-#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize))
+//
+// Global internal variables
+//
  
-static pthread_attr_t _pthread_attr_default = { };
+// _pthread_list_lock protects _pthread_count, access to the __pthread_head
+// list. Externally imported by pthread_cancelable.c.
+struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head);
+_pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER;
+
+uint32_t _main_qos;
  
+#if VARIANT_DYLD
  // The main thread's pthread_t
-PTHREAD_NOEXPORT struct _pthread _thread __attribute__((aligned(64))) = { };
+struct _pthread _main_thread __attribute__((aligned(64))) = { };
+#define main_thread() (&_main_thread)
+#else // VARIANT_DYLD
+struct _pthread *_main_thread_ptr;
+#endif // VARIANT_DYLD
  
-PTHREAD_NOEXPORT int default_priority;
-static int max_priority;
-static int min_priority;
+#if PTHREAD_DEBUG_LOG
+#include <fcntl.h>
+int _pthread_debuglog;
+uint64_t _pthread_debugstart;
+#endif
+
+//
+// Global static variables
+//
+static bool __workq_newapi;
+static uint8_t default_priority;
+#if !VARIANT_DYLD
+static uint8_t max_priority;
+static uint8_t min_priority;
+#endif // !VARIANT_DYLD
+static int _pthread_count = 1;
  static int pthread_concurrency;
+static uintptr_t _pthread_ptr_munge_token;
+
+static void (*exitf)(int) = __exit;
+#if !VARIANT_DYLD
+static void *(*_pthread_malloc)(size_t) = NULL;
+static void (*_pthread_free)(void *) = NULL;
+#endif // !VARIANT_DYLD
  
  // work queue support data
-static void (*__libdispatch_workerfunction)(pthread_priority_t) = NULL;
-static void (*__libdispatch_keventfunction)(void **events, int *nevents) = NULL;
-static void (*__libdispatch_workloopfunction)(uint64_t *workloop_id, void **events, int *nevents) = NULL;
+PTHREAD_NORETURN
+static void
+__pthread_invalid_keventfunction(void **events, int *nevents)
+{
+       PTHREAD_CLIENT_CRASH(0, "Invalid kqworkq setup");
+}
+
+PTHREAD_NORETURN
+static void
+__pthread_invalid_workloopfunction(uint64_t *workloop_id, void **events, int *nevents)
+{
+       PTHREAD_CLIENT_CRASH(0, "Invalid kqwl setup");
+}
+static pthread_workqueue_function2_t __libdispatch_workerfunction;
+static pthread_workqueue_function_kevent_t __libdispatch_keventfunction = &__pthread_invalid_keventfunction;
+static pthread_workqueue_function_workloop_t __libdispatch_workloopfunction = &__pthread_invalid_workloopfunction;
  static int __libdispatch_offset;
+static int __pthread_supported_features; // supported feature set
  
-// supported feature set
-int __pthread_supported_features;
-static bool __workq_newapi;
+#if defined(__i386__) || defined(__x86_64__)
+static mach_vm_address_t __pthread_stack_hint = 0xB0000000;
+#else
+#error no __pthread_stack_hint for this architecture
+#endif
  
  //
  // Function prototypes
  //
  
  // pthread primitives
-static int _pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack);
-static int _pthread_deallocate(pthread_t t);
-
-static void _pthread_terminate_invoke(pthread_t t);
-
-static inline void _pthread_struct_init(pthread_t t,
-       const pthread_attr_t *attrs,
-       void *stack,
-       size_t stacksize,
-       void *freeaddr,
-       size_t freesize);
+static inline void _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs,
+               void *stack, size_t stacksize, void *freeaddr, size_t freesize);
  
+#if VARIANT_DYLD
+static void _pthread_set_self_dyld(void);
+#endif // VARIANT_DYLD
  static inline void _pthread_set_self_internal(pthread_t, bool needs_tsd_base_set);
  
  static void _pthread_dealloc_reply_port(pthread_t t);
  static void _pthread_dealloc_special_reply_port(pthread_t t);
  
-static inline void __pthread_add_thread(pthread_t t, const pthread_attr_t *attr, bool parent, bool from_mach_thread);
-static inline int __pthread_remove_thread(pthread_t t, bool child, bool *should_exit);
+static inline void __pthread_started_thread(pthread_t t);
  
  static void _pthread_exit(pthread_t self, void *value_ptr) __dead2;
  
-static inline void _pthread_introspection_thread_create(pthread_t t, bool destroy);
+static inline void _pthread_introspection_thread_create(pthread_t t);
  static inline void _pthread_introspection_thread_start(pthread_t t);
-static inline void _pthread_introspection_thread_terminate(pthread_t t, void *freeaddr, size_t freesize, bool destroy);
+static inline void _pthread_introspection_thread_terminate(pthread_t t);
  static inline void _pthread_introspection_thread_destroy(pthread_t t);
  
  extern void _pthread_set_self(pthread_t);
  extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse); // trampoline into _pthread_wqthread
  extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); // trampoline into _pthread_start
  
-/* Compatibility: previous pthread API used WORKQUEUE_OVERCOMMIT to request overcommit threads from
- * the kernel. This definition is kept here, in userspace only, to perform the compatibility shimm
- * from old API requests to the new kext conventions.
- */
-#define WORKQUEUE_OVERCOMMIT 0x10000
-
  /*
   * Flags filed passed to bsdthread_create and back in pthread_start
-31  <---------------------------------> 0
-_________________________________________
-| flags(8) | policy(8) | importance(16) |
------------------------------------------
-*/
-
-#define PTHREAD_START_CUSTOM           0x01000000
+ * 31  <---------------------------------> 0
+ * _________________________________________
+ * | flags(8) | policy(8) | importance(16) |
+ * -----------------------------------------
+ */
+#define PTHREAD_START_CUSTOM           0x01000000 // <rdar://problem/34501401>
  #define PTHREAD_START_SETSCHED         0x02000000
-#define PTHREAD_START_DETACHED         0x04000000
+// was PTHREAD_START_DETACHED          0x04000000
  #define PTHREAD_START_QOSCLASS         0x08000000
  #define PTHREAD_START_TSD_BASE_SET     0x10000000
+#define PTHREAD_START_SUSPENDED                0x20000000
  #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
  #define PTHREAD_START_POLICY_BITSHIFT 16
  #define PTHREAD_START_POLICY_MASK 0xff
  #define PTHREAD_START_IMPORTANCE_MASK 0xffff
  
-static int pthread_setschedparam_internal(pthread_t, mach_port_t, int, const struct sched_param *);
+#if (!defined(__OPEN_SOURCE__) && TARGET_OS_OSX) || OS_VARIANT_RESOLVED // 40703288
+static int pthread_setschedparam_internal(pthread_t, mach_port_t, int,
+               const struct sched_param *);
+#endif
+
  extern pthread_t __bsdthread_create(void *(*func)(void *), void * func_arg, void * stack, pthread_t  thread, unsigned int flags);
  extern int __bsdthread_register(void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, void *, int), int,void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), int32_t *,__uint64_t);
  extern int __bsdthread_terminate(void * freeaddr, size_t freesize, mach_port_t kport, mach_port_t joinsem);
  extern __uint64_t __thread_selfid( void );
  
-extern int __workq_open(void);
-extern int __workq_kernreturn(int, void *, int, int);
-
-#if defined(__i386__) || defined(__x86_64__)
-static const mach_vm_address_t PTHREAD_STACK_HINT = 0xB0000000;
+#if __LP64__
+_Static_assert(offsetof(struct _pthread, tsd) == 224, "TSD LP64 offset");
  #else
-#error no PTHREAD_STACK_HINT for this architecture
+_Static_assert(offsetof(struct _pthread, tsd) == 176, "TSD ILP32 offset");
  #endif
-
-// Check that offsets of _PTHREAD_STRUCT_DIRECT_*_OFFSET values hasn't changed
  _Static_assert(offsetof(struct _pthread, tsd) + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET
                 == offsetof(struct _pthread, thread_id),
                 "_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET is correct");
  
-// Allocate a thread structure, stack and guard page.
-//
-// The thread structure may optionally be placed in the same allocation as the
-// stack, residing above the top of the stack. This cannot be done if a
-// custom stack address is provided.
-//
-// Similarly the guard page cannot be allocated if a custom stack address is
-// provided.
-//
-// The allocated thread structure is initialized with values that indicate how
-// it should be freed.
-
-static int
-_pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack)
-{
-       int res;
-       kern_return_t kr;
-       pthread_t t = NULL;
-       mach_vm_address_t allocaddr = PTHREAD_STACK_HINT;
-       size_t allocsize = 0;
-       size_t guardsize = 0;
-       size_t stacksize = 0;
-
-       PTHREAD_ASSERT(attrs->stacksize >= PTHREAD_STACK_MIN);
-
-       *thread = NULL;
-       *stack = NULL;
-
-       // Allocate a pthread structure if necessary
-
-       if (attrs->stackaddr != NULL) {
-               PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0);
-               *stack = attrs->stackaddr;
-               allocsize = PTHREAD_SIZE;
-       } else {
-               guardsize = attrs->guardsize;
-               stacksize = attrs->stacksize;
-               allocsize = stacksize + guardsize + PTHREAD_SIZE;
-       }
-
-       kr = mach_vm_map(mach_task_self(),
-                        &allocaddr,
-                        allocsize,
-                        vm_page_size - 1,
-                        VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE,
-                        MEMORY_OBJECT_NULL,
-                        0,
-                        FALSE,
-                        VM_PROT_DEFAULT,
-                        VM_PROT_ALL,
-                        VM_INHERIT_DEFAULT);
-
-       if (kr != KERN_SUCCESS) {
-               kr = mach_vm_allocate(mach_task_self(),
-                                &allocaddr,
-                                allocsize,
-                                VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
-       }
-
-       if (kr == KERN_SUCCESS) {
-               // The stack grows down.
-               // Set the guard page at the lowest address of the
-               // newly allocated stack. Return the highest address
-               // of the stack.
-               if (guardsize) {
-                       (void)mach_vm_protect(mach_task_self(), allocaddr, guardsize, FALSE, VM_PROT_NONE);
-               }
-
-               // Thread structure resides at the top of the stack.
-               t = (void *)(allocaddr + stacksize + guardsize);
-               if (stacksize) {
-                       // Returns the top of the stack.
-                       *stack = t;
-               }
-       }
-
-       if (t != NULL) {
-               _pthread_struct_init(t, attrs,
-                                    *stack, attrs->stacksize,
-                                    allocaddr, allocsize);
-               *thread = t;
-               res = 0;
-       } else {
-               res = EAGAIN;
-       }
-        return res;
-}
-
-static int
-_pthread_deallocate(pthread_t t)
-{
-       // Don't free the main thread.
-       if (t != &_thread) {
-               kern_return_t ret;
-               ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
-               PTHREAD_ASSERT(ret == KERN_SUCCESS);
-       }
-       return 0;
-}
-
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wreturn-stack-address"
-
-PTHREAD_NOINLINE
-static void*
-_pthread_current_stack_address(void)
-{
-       int a;
-       return &a;
-}
-
-#pragma clang diagnostic pop
-
-// Terminates the thread if called from the currently running thread.
-PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED
-static void
-_pthread_terminate(pthread_t t)
-{
-       PTHREAD_ASSERT(t == pthread_self());
-
-       uintptr_t freeaddr = (uintptr_t)t->freeaddr;
-       size_t freesize = t->freesize;
-
-       // the size of just the stack
-       size_t freesize_stack = t->freesize;
-
-       // We usually pass our structure+stack to bsdthread_terminate to free, but
-       // if we get told to keep the pthread_t structure around then we need to
-       // adjust the free size and addr in the pthread_t to just refer to the
-       // structure and not the stack.  If we do end up deallocating the
-       // structure, this is useless work since no one can read the result, but we
-       // can't do it after the call to pthread_remove_thread because it isn't
-       // safe to dereference t after that.
-       if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){
-               // Check to ensure the pthread structure itself is part of the
-               // allocation described by freeaddr/freesize, in which case we split and
-               // only deallocate the area below the pthread structure.  In the event of a
-               // custom stack, the freeaddr/size will be the pthread structure itself, in
-               // which case we shouldn't free anything (the final else case).
-               freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr);
-
-               // describe just the remainder for deallocation when the pthread_t goes away
-               t->freeaddr += freesize_stack;
-               t->freesize -= freesize_stack;
-       } else if (t == &_thread){
-               freeaddr = t->stackaddr - pthread_get_stacksize_np(t);
-               uintptr_t stackborder = trunc_page((uintptr_t)_pthread_current_stack_address());
-               freesize_stack = stackborder - freeaddr;
-       } else {
-               freesize_stack = 0;
-       }
-
-       mach_port_t kport = _pthread_kernel_thread(t);
-       semaphore_t joinsem = t->joiner_notify;
-
-       _pthread_dealloc_special_reply_port(t);
-       _pthread_dealloc_reply_port(t);
+#pragma mark pthread attrs
  
-       // After the call to __pthread_remove_thread, it is not safe to
-       // dereference the pthread_t structure.
-
-       bool destroy, should_exit;
-       destroy = (__pthread_remove_thread(t, true, &should_exit) != EBUSY);
-
-       if (!destroy || t == &_thread) {
-               // Use the adjusted freesize of just the stack that we computed above.
-               freesize = freesize_stack;
-       }
-
-       // Check if there is nothing to free because the thread has a custom
-       // stack allocation and is joinable.
-       if (freesize == 0) {
-               freeaddr = 0;
-       }
-       _pthread_introspection_thread_terminate(t, freeaddr, freesize, destroy);
-       if (should_exit) {
-               exitf(0);
-       }
-
-       __bsdthread_terminate((void *)freeaddr, freesize, kport, joinsem);
-       PTHREAD_ABORT("thread %p didn't terminate", t);
-}
-
-PTHREAD_NORETURN
-static void
-_pthread_terminate_invoke(pthread_t t)
-{
-       _pthread_terminate(t);
-}
+_Static_assert(sizeof(struct _pthread_attr_t) == sizeof(__darwin_pthread_attr_t),
+               "internal pthread_attr_t == external pthread_attr_t");
  
  int
  pthread_attr_destroy(pthread_attr_t *attr)
@@ -467,12 +316,24 @@ pthread_attr_getinheritsched(const pthread_attr_t *attr, int *inheritsched)
         return ret;
  }
  
+static PTHREAD_ALWAYS_INLINE void
+_pthread_attr_get_schedparam(const pthread_attr_t *attr,
+               struct sched_param *param)
+{
+       if (attr->schedset) {
+               *param = attr->param;
+       } else {
+               param->sched_priority = default_priority;
+               param->quantum = 10; /* quantum isn't public yet */
+       }
+}
+
  int
  pthread_attr_getschedparam(const pthread_attr_t *attr, struct sched_param *param)
  {
         int ret = EINVAL;
         if (attr->sig == _PTHREAD_ATTR_SIG) {
-               *param = attr->param;
+               _pthread_attr_get_schedparam(attr, param);
                 ret = 0;
         }
         return ret;
@@ -489,24 +350,10 @@ pthread_attr_getschedpolicy(const pthread_attr_t *attr, int *policy)
         return ret;
  }
  
-// Default stack size is 512KB; independent of the main thread's stack size.
-static const size_t DEFAULT_STACK_SIZE = 512 * 1024;
-
  int
  pthread_attr_init(pthread_attr_t *attr)
  {
-       attr->stacksize = DEFAULT_STACK_SIZE;
-       attr->stackaddr = NULL;
-       attr->sig = _PTHREAD_ATTR_SIG;
-       attr->param.sched_priority = default_priority;
-       attr->param.quantum = 10; /* quantum isn't public yet */
-       attr->detached = PTHREAD_CREATE_JOINABLE;
-       attr->inherit = _PTHREAD_DEFAULT_INHERITSCHED;
-       attr->policy = _PTHREAD_DEFAULT_POLICY;
-       attr->fastpath = 1;
-       attr->schedset = 0;
-       attr->guardsize = vm_page_size;
-       attr->qosclass = _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, 0);
+       *attr = _pthread_attr_default;
         return 0;
  }
  
@@ -515,8 +362,8 @@ pthread_attr_setdetachstate(pthread_attr_t *attr, int detachstate)
  {
         int ret = EINVAL;
         if (attr->sig == _PTHREAD_ATTR_SIG &&
-           (detachstate == PTHREAD_CREATE_JOINABLE ||
-            detachstate == PTHREAD_CREATE_DETACHED)) {
+                       (detachstate == PTHREAD_CREATE_JOINABLE ||
+                       detachstate == PTHREAD_CREATE_DETACHED)) {
                 attr->detached = detachstate;
                 ret = 0;
         }
@@ -528,8 +375,8 @@ pthread_attr_setinheritsched(pthread_attr_t *attr, int inheritsched)
  {
         int ret = EINVAL;
         if (attr->sig == _PTHREAD_ATTR_SIG &&
-           (inheritsched == PTHREAD_INHERIT_SCHED ||
-            inheritsched == PTHREAD_EXPLICIT_SCHED)) {
+                       (inheritsched == PTHREAD_INHERIT_SCHED ||
+                       inheritsched == PTHREAD_EXPLICIT_SCHED)) {
                 attr->inherit = inheritsched;
                 ret = 0;
         }
@@ -553,12 +400,14 @@ int
  pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy)
  {
         int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG &&
-           (policy == SCHED_OTHER ||
-            policy == SCHED_RR ||
-            policy == SCHED_FIFO)) {
+       if (attr->sig == _PTHREAD_ATTR_SIG && (policy == SCHED_OTHER ||
+                       policy == SCHED_RR || policy == SCHED_FIFO)) {
+               if (!_PTHREAD_POLICY_IS_FIXEDPRI(policy)) {
+                       /* non-fixedpri policy should remove cpupercent */
+                       attr->cpupercentset = 0;
+               }
                 attr->policy = policy;
-               attr->schedset = 1;
+               attr->policyset = 1;
                 ret = 0;
         }
         return ret;
@@ -606,21 +455,27 @@ pthread_attr_setstackaddr(pthread_attr_t *attr, void *stackaddr)
  {
         int ret = EINVAL;
         if (attr->sig == _PTHREAD_ATTR_SIG &&
-           ((uintptr_t)stackaddr % vm_page_size) == 0) {
+                       ((uintptr_t)stackaddr % vm_page_size) == 0) {
                 attr->stackaddr = stackaddr;
-               attr->fastpath = 0;
+               attr->defaultguardpage = false;
                 attr->guardsize = 0;
                 ret = 0;
         }
         return ret;
  }
  
+static inline size_t
+_pthread_attr_stacksize(const pthread_attr_t *attr)
+{
+       return attr->stacksize ? attr->stacksize : DEFAULT_STACK_SIZE;
+}
+
  int
  pthread_attr_getstacksize(const pthread_attr_t *attr, size_t *stacksize)
  {
         int ret = EINVAL;
         if (attr->sig == _PTHREAD_ATTR_SIG) {
-               *stacksize = attr->stacksize;
+               *stacksize = _pthread_attr_stacksize(attr);
                 ret = 0;
         }
         return ret;
@@ -631,70 +486,356 @@ pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize)
  {
         int ret = EINVAL;
         if (attr->sig == _PTHREAD_ATTR_SIG &&
-           (stacksize % vm_page_size) == 0 &&
-           stacksize >= PTHREAD_STACK_MIN) {
+                       (stacksize % vm_page_size) == 0 &&
+                       stacksize >= PTHREAD_STACK_MIN) {
+               attr->stacksize = stacksize;
+               ret = 0;
+       }
+       return ret;
+}
+
+int
+pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t * stacksize)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG) {
+               *stackaddr = (void *)((uintptr_t)attr->stackaddr - attr->stacksize);
+               *stacksize = _pthread_attr_stacksize(attr);
+               ret = 0;
+       }
+       return ret;
+}
+
+// Per SUSv3, the stackaddr is the base address, the lowest addressable byte
+// address. This is not the same as in pthread_attr_setstackaddr.
+int
+pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG &&
+                       ((uintptr_t)stackaddr % vm_page_size) == 0 &&
+                       (stacksize % vm_page_size) == 0 &&
+                       stacksize >= PTHREAD_STACK_MIN) {
+               attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize);
                 attr->stacksize = stacksize;
                 ret = 0;
         }
-       return ret;
+       return ret;
+}
+
+int
+pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG && (guardsize % vm_page_size) == 0) {
+               /* Guardsize of 0 is valid, means no guard */
+               attr->defaultguardpage = false;
+               attr->guardsize = guardsize;
+               ret = 0;
+       }
+       return ret;
+}
+
+static inline size_t
+_pthread_attr_guardsize(const pthread_attr_t *attr)
+{
+       return attr->defaultguardpage ? vm_page_size : attr->guardsize;
+}
+
+int
+pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG) {
+               *guardsize = _pthread_attr_guardsize(attr);
+               ret = 0;
+       }
+       return ret;
+}
+
+int
+pthread_attr_setcpupercent_np(pthread_attr_t *attr, int percent,
+               unsigned long refillms)
+{
+       int ret = EINVAL;
+       if (attr->sig == _PTHREAD_ATTR_SIG && percent < UINT8_MAX &&
+                       refillms < _PTHREAD_ATTR_REFILLMS_MAX && attr->policyset &&
+                       _PTHREAD_POLICY_IS_FIXEDPRI(attr->policy)) {
+               attr->cpupercent = percent;
+               attr->refillms = (uint32_t)(refillms & 0x00ffffff);
+               attr->cpupercentset = 1;
+               ret = 0;
+       }
+       return ret;
+}
+
+#pragma mark pthread lifetime
+
+// Allocate a thread structure, stack and guard page.
+//
+// The thread structure may optionally be placed in the same allocation as the
+// stack, residing above the top of the stack. This cannot be done if a
+// custom stack address is provided.
+//
+// Similarly the guard page cannot be allocated if a custom stack address is
+// provided.
+//
+// The allocated thread structure is initialized with values that indicate how
+// it should be freed.
+
+static pthread_t
+_pthread_allocate(const pthread_attr_t *attrs, void **stack)
+{
+       mach_vm_address_t allocaddr = __pthread_stack_hint;
+       size_t allocsize, guardsize, stacksize;
+       kern_return_t kr;
+       pthread_t t;
+
+       PTHREAD_ASSERT(attrs->stacksize == 0 ||
+                       attrs->stacksize >= PTHREAD_STACK_MIN);
+
+       // Allocate a pthread structure if necessary
+
+       if (attrs->stackaddr != NULL) {
+               PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0);
+               allocsize = PTHREAD_SIZE;
+               guardsize = 0;
+               // <rdar://problem/42588315> if the attrs struct specifies a custom
+               // stack address but not a custom size, using ->stacksize here instead
+               // of _pthread_attr_stacksize stores stacksize as zero, indicating
+               // that the stack size is unknown.
+               stacksize = attrs->stacksize;
+       } else {
+               guardsize = _pthread_attr_guardsize(attrs);
+               stacksize = _pthread_attr_stacksize(attrs) + PTHREAD_T_OFFSET;
+               allocsize = stacksize + guardsize + PTHREAD_SIZE;
+               allocsize = mach_vm_round_page(allocsize);
+       }
+
+       kr = mach_vm_map(mach_task_self(), &allocaddr, allocsize, vm_page_size - 1,
+                        VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL,
+                        0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+
+       if (kr != KERN_SUCCESS) {
+               kr = mach_vm_allocate(mach_task_self(), &allocaddr, allocsize,
+                                VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
+       }
+       if (kr != KERN_SUCCESS) {
+               *stack  = NULL;
+               return NULL;
+       }
+
+       // The stack grows down.
+       // Set the guard page at the lowest address of the
+       // newly allocated stack. Return the highest address
+       // of the stack.
+       if (guardsize) {
+               (void)mach_vm_protect(mach_task_self(), allocaddr, guardsize,
+                               FALSE, VM_PROT_NONE);
+       }
+
+       // Thread structure resides at the top of the stack (when using a
+       // custom stack, allocsize == PTHREAD_SIZE, so places the pthread_t
+       // at allocaddr).
+       t = (pthread_t)(allocaddr + allocsize - PTHREAD_SIZE);
+       if (attrs->stackaddr) {
+               *stack = attrs->stackaddr;
+       } else {
+               *stack = t;
+       }
+
+       _pthread_struct_init(t, attrs, *stack, stacksize, allocaddr, allocsize);
+       return t;
+}
+
+PTHREAD_NOINLINE
+void
+_pthread_deallocate(pthread_t t, bool from_mach_thread)
+{
+       kern_return_t ret;
+
+       // Don't free the main thread.
+       if (t != main_thread()) {
+               if (!from_mach_thread) { // see __pthread_add_thread
+                       _pthread_introspection_thread_destroy(t);
+               }
+               ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
+               PTHREAD_ASSERT(ret == KERN_SUCCESS);
+       }
  }
  
-int
-pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t * stacksize)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreturn-stack-address"
+
+PTHREAD_NOINLINE
+static void*
+_pthread_current_stack_address(void)
  {
-       int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG) {
-               *stackaddr = (void *)((uintptr_t)attr->stackaddr - attr->stacksize);
-               *stacksize = attr->stacksize;
-               ret = 0;
-       }
-       return ret;
+       int a;
+       return &a;
  }
  
-// Per SUSv3, the stackaddr is the base address, the lowest addressable byte
-// address. This is not the same as in pthread_attr_setstackaddr.
-int
-pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize)
+#pragma clang diagnostic pop
+
+void
+_pthread_joiner_wake(pthread_t thread)
  {
-       int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG &&
-           ((uintptr_t)stackaddr % vm_page_size) == 0 &&
-           (stacksize % vm_page_size) == 0 &&
-           stacksize >= PTHREAD_STACK_MIN) {
-               attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize);
-               attr->stacksize = stacksize;
-               attr->fastpath = 0;
-               ret = 0;
+       uint32_t *exit_gate = &thread->tl_exit_gate;
+
+       for (;;) {
+               int ret = __ulock_wake(UL_UNFAIR_LOCK | ULF_NO_ERRNO, exit_gate, 0);
+               if (ret == 0 || ret == -ENOENT) {
+                       return;
+               }
+               if (ret != -EINTR) {
+                       PTHREAD_INTERNAL_CRASH(-ret, "pthread_join() wake failure");
+               }
         }
-       return ret;
  }
  
-int
-pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize)
+// Terminates the thread if called from the currently running thread.
+PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED
+static void
+_pthread_terminate(pthread_t t, void *exit_value)
  {
-       int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG) {
-               /* Guardsize of 0 is valid, ot means no guard */
-               if ((guardsize % vm_page_size) == 0) {
-                       attr->guardsize = guardsize;
-                       attr->fastpath = 0;
-                       ret = 0;
+       PTHREAD_ASSERT(t == pthread_self());
+
+       _pthread_introspection_thread_terminate(t);
+
+       uintptr_t freeaddr = (uintptr_t)t->freeaddr;
+       size_t freesize = t->freesize;
+       bool should_exit;
+
+       // the size of just the stack
+       size_t freesize_stack = t->freesize;
+
+       // We usually pass our structure+stack to bsdthread_terminate to free, but
+       // if we get told to keep the pthread_t structure around then we need to
+       // adjust the free size and addr in the pthread_t to just refer to the
+       // structure and not the stack.  If we do end up deallocating the
+       // structure, this is useless work since no one can read the result, but we
+       // can't do it after the call to pthread_remove_thread because it isn't
+       // safe to dereference t after that.
+       if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){
+               // Check to ensure the pthread structure itself is part of the
+               // allocation described by freeaddr/freesize, in which case we split and
+               // only deallocate the area below the pthread structure.  In the event of a
+               // custom stack, the freeaddr/size will be the pthread structure itself, in
+               // which case we shouldn't free anything (the final else case).
+               freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr);
+
+               // describe just the remainder for deallocation when the pthread_t goes away
+               t->freeaddr += freesize_stack;
+               t->freesize -= freesize_stack;
+       } else if (t == main_thread()) {
+               freeaddr = t->stackaddr - pthread_get_stacksize_np(t);
+               uintptr_t stackborder = trunc_page((uintptr_t)_pthread_current_stack_address());
+               freesize_stack = stackborder - freeaddr;
+       } else {
+               freesize_stack = 0;
+       }
+
+       mach_port_t kport = _pthread_kernel_thread(t);
+       bool keep_thread_struct = false, needs_wake = false;
+       semaphore_t custom_stack_sema = MACH_PORT_NULL;
+
+       _pthread_dealloc_special_reply_port(t);
+       _pthread_dealloc_reply_port(t);
+
+       _PTHREAD_LOCK(_pthread_list_lock);
+
+       // This piece of code interacts with pthread_join. It will always:
+       // - set tl_exit_gate to MACH_PORT_DEAD (thread exited)
+       // - set tl_exit_value to the value passed to pthread_exit()
+       // - decrement _pthread_count, so that we can exit the process when all
+       //   threads exited even if not all of them were joined.
+       t->tl_exit_gate = MACH_PORT_DEAD;
+       t->tl_exit_value = exit_value;
+       should_exit = (--_pthread_count <= 0);
+
+       // If we see a joiner, we prepost that the join has to succeed,
+       // and the joiner is committed to finish (even if it was canceled)
+       if (t->tl_join_ctx) {
+               custom_stack_sema = _pthread_joiner_prepost_wake(t); // unsets tl_joinable
+               needs_wake = true;
+       }
+
+       // Joinable threads that have no joiner yet are kept on the thread list
+       // so that pthread_join() can later discover the thread when it is joined,
+       // and will have to do the pthread_t cleanup.
+       if (t->tl_joinable) {
+               t->tl_joiner_cleans_up = keep_thread_struct = true;
+       } else {
+               TAILQ_REMOVE(&__pthread_head, t, tl_plist);
+       }
+
+       _PTHREAD_UNLOCK(_pthread_list_lock);
+
+       if (needs_wake) {
+               // When we found a waiter, we want to drop the very contended list lock
+               // before we do the syscall in _pthread_joiner_wake(). Then, we decide
+               // who gets to cleanup the pthread_t between the joiner and the exiting
+               // thread:
+               // - the joiner tries to set tl_join_ctx to NULL
+               // - the exiting thread tries to set tl_joiner_cleans_up to true
+               // Whoever does it first commits the other guy to cleanup the pthread_t
+               _pthread_joiner_wake(t);
+               _PTHREAD_LOCK(_pthread_list_lock);
+               if (t->tl_join_ctx) {
+                       t->tl_joiner_cleans_up = true;
+                       keep_thread_struct = true;
                 }
+               _PTHREAD_UNLOCK(_pthread_list_lock);
         }
-       return ret;
-}
  
-int
-pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize)
-{
-       int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG) {
-               *guardsize = attr->guardsize;
-               ret = 0;
+       //
+       // /!\ dereferencing `t` past this point is not safe /!\
+       //
+
+       if (keep_thread_struct || t == main_thread()) {
+               // Use the adjusted freesize of just the stack that we computed above.
+               freesize = freesize_stack;
+       } else {
+               _pthread_introspection_thread_destroy(t);
         }
-       return ret;
+
+       // Check if there is nothing to free because the thread has a custom
+       // stack allocation and is joinable.
+       if (freesize == 0) {
+               freeaddr = 0;
+       }
+       if (should_exit) {
+               exitf(0);
+       }
+       __bsdthread_terminate((void *)freeaddr, freesize, kport, custom_stack_sema);
+       PTHREAD_INTERNAL_CRASH(t, "thread didn't terminate");
+}
+
+PTHREAD_NORETURN
+static void
+_pthread_terminate_invoke(pthread_t t, void *exit_value)
+{
+#if PTHREAD_T_OFFSET
+       void *p = NULL;
+       // <rdar://problem/25688492> During pthread termination there is a race
+       // between pthread_join and pthread_terminate; if the joiner is responsible
+       // for cleaning up the pthread_t struct, then it may destroy some part of the
+       // stack with it on 16k OSes. So that this doesn't cause _pthread_terminate()
+       // to crash because its stack has been removed from under its feet, just make
+       // sure termination happens in a part of the stack that is not on the same
+       // page as the pthread_t.
+       if (trunc_page((uintptr_t)__builtin_frame_address(0)) ==
+                       trunc_page((uintptr_t)t)) {
+               p = alloca(PTHREAD_T_OFFSET);
+       }
+       // And this __asm__ volatile is needed to stop the compiler from optimising
+       // away the alloca() completely.
+       __asm__ volatile ("" : : "r"(p) );
+#endif
+       _pthread_terminate(t, exit_value);
  }
  
+#pragma mark pthread start / body
  
  /*
   * Create and start execution of a new thread.
@@ -704,51 +845,22 @@ static void
  _pthread_body(pthread_t self, bool needs_tsd_base_set)
  {
         _pthread_set_self_internal(self, needs_tsd_base_set);
-       __pthread_add_thread(self, NULL, false, false);
-       void *result = (self->fun)(self->arg);
-
-       _pthread_exit(self, result);
+       __pthread_started_thread(self);
+       _pthread_exit(self, (self->fun)(self->arg));
  }
  
  PTHREAD_NORETURN
  void
-_pthread_start(pthread_t self,
-              mach_port_t kport,
-              void *(*fun)(void *),
-              void *arg,
-              size_t stacksize,
-              unsigned int pflags)
-{
-       if ((pflags & PTHREAD_START_CUSTOM) == 0) {
-               void *stackaddr = self;
-               _pthread_struct_init(self, &_pthread_attr_default,
-                               stackaddr, stacksize,
-                               PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize));
-
-               if (pflags & PTHREAD_START_SETSCHED) {
-                       self->policy = ((pflags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK);
-                       self->param.sched_priority = (pflags & PTHREAD_START_IMPORTANCE_MASK);
-               }
-
-               if ((pflags & PTHREAD_START_DETACHED) == PTHREAD_START_DETACHED)  {
-                       self->detached &= ~PTHREAD_CREATE_JOINABLE;
-                       self->detached |= PTHREAD_CREATE_DETACHED;
-               }
-       }
-
-       if ((pflags & PTHREAD_START_QOSCLASS) != 0) {
-               /* The QoS class is cached in the TSD of the pthread, so to reflect the
-                * class that the kernel brought us up at, the TSD must be primed from the
-                * flags parameter.
-                */
-               self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (pflags & PTHREAD_START_QOSCLASS_MASK);
-       } else {
-               /* Give the thread a default QoS tier, of zero. */
-               self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
-       }
-
+_pthread_start(pthread_t self, mach_port_t kport,
+               __unused void *(*fun)(void *), __unused void *arg,
+               __unused size_t stacksize, unsigned int pflags)
+{
         bool thread_tsd_bsd_set = (bool)(pflags & PTHREAD_START_TSD_BASE_SET);
  
+       if (os_unlikely(pflags & PTHREAD_START_SUSPENDED)) {
+               PTHREAD_INTERNAL_CRASH(0,
+                               "kernel without PTHREAD_START_SUSPENDED support");
+       }
  #if DEBUG
         PTHREAD_ASSERT(MACH_PORT_VALID(kport));
         PTHREAD_ASSERT(_pthread_kernel_thread(self) == kport);
@@ -756,20 +868,13 @@ _pthread_start(pthread_t self,
         // will mark the thread initialized
         _pthread_markcancel_if_canceled(self, kport);
  
-       self->fun = fun;
-       self->arg = arg;
-
         _pthread_body(self, !thread_tsd_bsd_set);
  }
  
  PTHREAD_ALWAYS_INLINE
  static inline void
-_pthread_struct_init(pthread_t t,
-                    const pthread_attr_t *attrs,
-                    void *stackaddr,
-                    size_t stacksize,
-                    void *freeaddr,
-                    size_t freesize)
+_pthread_struct_init(pthread_t t, const pthread_attr_t *attrs,
+               void *stackaddr, size_t stacksize, void *freeaddr, size_t freesize)
  {
  #if DEBUG
         PTHREAD_ASSERT(t->sig != _PTHREAD_SIG);
@@ -777,23 +882,34 @@ _pthread_struct_init(pthread_t t,
  
         t->sig = _PTHREAD_SIG;
         t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = t;
-       t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+       t->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &t->err_no;
+       if (attrs->schedset == 0) {
+               t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass;
+       } else {
+               t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] =
+                               _pthread_unspecified_priority();
+       }
+       t->tsd[_PTHREAD_TSD_SLOT_PTR_MUNGE] = _pthread_ptr_munge_token;
+       t->tl_has_custom_stack = (attrs->stackaddr != NULL);
+
         _PTHREAD_LOCK_INIT(t->lock);
  
         t->stackaddr = stackaddr;
-       t->stacksize = stacksize;
+       t->stackbottom = stackaddr - stacksize;
         t->freeaddr = freeaddr;
         t->freesize = freesize;
  
-       t->guardsize = attrs->guardsize;
-       t->detached = attrs->detached;
+       t->guardsize = _pthread_attr_guardsize(attrs);
+       t->tl_joinable = (attrs->detached == PTHREAD_CREATE_JOINABLE);
         t->inherit = attrs->inherit;
-       t->policy = attrs->policy;
+       t->tl_policy = attrs->policy;
         t->schedset = attrs->schedset;
-       t->param = attrs->param;
+       _pthread_attr_get_schedparam(attrs, &t->tl_param);
         t->cancel_state = PTHREAD_CANCEL_ENABLE | PTHREAD_CANCEL_DEFERRED;
  }
  
+#pragma mark pthread public interface
+
  /* Need to deprecate this in future */
  int
  _pthread_is_threaded(void)
@@ -818,7 +934,7 @@ mach_port_t
  pthread_mach_thread_np(pthread_t t)
  {
         mach_port_t kport = MACH_PORT_NULL;
-       (void)_pthread_is_valid(t, 0, &kport);
+       (void)_pthread_is_valid(t, &kport);
         return kport;
  }
  
@@ -831,7 +947,7 @@ pthread_from_mach_thread_np(mach_port_t kernel_thread)
         /* No need to wait as mach port is already known */
         _PTHREAD_LOCK(_pthread_list_lock);
  
-       TAILQ_FOREACH(p, &__pthread_head, plist) {
+       TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
                 if (_pthread_kernel_thread(p) == kernel_thread) {
                         break;
                 }
@@ -847,6 +963,7 @@ size_t
  pthread_get_stacksize_np(pthread_t t)
  {
         size_t size = 0;
+       size_t stacksize = t->stackaddr - t->stackbottom;
  
         if (t == NULL) {
                 return ESRCH; // XXX bug?
@@ -863,7 +980,7 @@ pthread_get_stacksize_np(pthread_t t)
         //
         // Of course, on arm rlim_cur == rlim_max and there's only the one guard
         // page.  So, we can skip all this there.
-       if (t == &_thread && t->stacksize + vm_page_size != t->freesize) {
+       if (t == main_thread() && stacksize + vm_page_size != t->freesize) {
                 // We want to call getrlimit() just once, as it's relatively expensive
                 static size_t rlimit_stack;
  
@@ -877,55 +994,46 @@ pthread_get_stacksize_np(pthread_t t)
                 }
  
                 if (rlimit_stack == 0 || rlimit_stack > t->freesize) {
-                       return t->stacksize;
+                       return stacksize;
                 } else {
                         return rlimit_stack;
                 }
         }
  #endif /* !defined(__arm__) && !defined(__arm64__) */
  
-       if (t == pthread_self() || t == &_thread) {
-               return t->stacksize;
+       if (t == pthread_self() || t == main_thread()) {
+               size = stacksize;
+               goto out;
         }
  
-       _PTHREAD_LOCK(_pthread_list_lock);
-
-       if (_pthread_is_valid_locked(t)) {
-               size = t->stacksize;
+       if (_pthread_validate_thread_and_list_lock(t)) {
+               size = stacksize;
+               _PTHREAD_UNLOCK(_pthread_list_lock);
         } else {
                 size = ESRCH; // XXX bug?
         }
  
-       _PTHREAD_UNLOCK(_pthread_list_lock);
-
-       return size;
+out:
+       // <rdar://problem/42588315> binary compatibility issues force us to return
+       // DEFAULT_STACK_SIZE here when we do not know the size of the stack
+       return size ? size : DEFAULT_STACK_SIZE;
  }
  
  PTHREAD_NOEXPORT_VARIANT
  void *
  pthread_get_stackaddr_np(pthread_t t)
  {
-       void *addr = NULL;
-
-       if (t == NULL) {
-               return (void *)(uintptr_t)ESRCH; // XXX bug?
-       }
-
         // since the main thread will not get de-allocated from underneath us
-       if (t == pthread_self() || t == &_thread) {
+       if (t == pthread_self() || t == main_thread()) {
                 return t->stackaddr;
         }
  
-       _PTHREAD_LOCK(_pthread_list_lock);
-
-       if (_pthread_is_valid_locked(t)) {
-               addr = t->stackaddr;
-       } else {
-               addr = (void *)(uintptr_t)ESRCH; // XXX bug?
+       if (!_pthread_validate_thread_and_list_lock(t)) {
+               return (void *)(uintptr_t)ESRCH; // XXX bug?
         }
  
+       void *addr = t->stackaddr;
         _PTHREAD_UNLOCK(_pthread_list_lock);
-
         return addr;
  }
  
@@ -979,32 +1087,30 @@ _pthread_dealloc_special_reply_port(pthread_t t)
  {
         mach_port_t special_reply_port = _pthread_special_reply_port(t);
         if (special_reply_port != MACH_PORT_NULL) {
-               mach_port_mod_refs(mach_task_self(), special_reply_port,
-                               MACH_PORT_RIGHT_RECEIVE, -1);
+               thread_destruct_special_reply_port(special_reply_port,
+                               THREAD_SPECIAL_REPLY_PORT_ALL);
         }
  }
  
  pthread_t
  pthread_main_thread_np(void)
  {
-       return &_thread;
+       return main_thread();
  }
  
  /* returns non-zero if the current thread is the main thread */
  int
  pthread_main_np(void)
  {
-       pthread_t self = pthread_self();
-
-       return ((self->detached & _PTHREAD_CREATE_PARENT) == _PTHREAD_CREATE_PARENT);
+       return pthread_self() == main_thread();
  }
  
  
-/* if we are passed in a pthread_t that is NULL, then we return
-   the current thread's thread_id. So folks don't have to call
-   pthread_self, in addition to us doing it, if they just want
-   their thread_id.
-*/
+/*
+ * if we are passed in a pthread_t that is NULL, then we return the current
+ * thread's thread_id. So folks don't have to call pthread_self, in addition to
+ * us doing it, if they just want their thread_id.
+ */
  PTHREAD_NOEXPORT_VARIANT
  int
  pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
@@ -1018,11 +1124,10 @@ pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
  
         if (thread == NULL || thread == self) {
                 *thread_id = self->thread_id;
+       } else if (!_pthread_validate_thread_and_list_lock(thread)) {
+               res = ESRCH;
         } else {
-               _PTHREAD_LOCK(_pthread_list_lock);
-               if (!_pthread_is_valid_locked(thread)) {
-                       res = ESRCH;
-               } else if (thread->thread_id == 0) {
+               if (thread->thread_id == 0) {
                         res = EINVAL;
                 } else {
                         *thread_id = thread->thread_id;
@@ -1036,20 +1141,18 @@ PTHREAD_NOEXPORT_VARIANT
  int
  pthread_getname_np(pthread_t thread, char *threadname, size_t len)
  {
-       int res = 0;
+       if (thread == pthread_self()) {
+               strlcpy(threadname, thread->pthread_name, len);
+               return 0;
+       }
  
-       if (thread == NULL) {
+       if (!_pthread_validate_thread_and_list_lock(thread)) {
                 return ESRCH;
         }
  
-       _PTHREAD_LOCK(_pthread_list_lock);
-       if (_pthread_is_valid_locked(thread)) {
-               strlcpy(threadname, thread->pthread_name, len);
-       } else {
-               res = ESRCH;
-       }
+       strlcpy(threadname, thread->pthread_name, len);
         _PTHREAD_UNLOCK(_pthread_list_lock);
-       return res;
+       return 0;
  }
  
  
@@ -1079,219 +1182,122 @@ pthread_setname_np(const char *name)
  
  PTHREAD_ALWAYS_INLINE
  static inline void
-__pthread_add_thread(pthread_t t, const pthread_attr_t *attrs,
-               bool parent, bool from_mach_thread)
+__pthread_add_thread(pthread_t t, bool from_mach_thread)
  {
-       bool should_deallocate = false;
-       bool should_add = true;
-
-       mach_port_t kport = _pthread_kernel_thread(t);
-       if (os_slowpath(!MACH_PORT_VALID(kport))) {
-               PTHREAD_CLIENT_CRASH(kport,
-                               "Unable to allocate thread port, possible port leak");
-       }
-
         if (from_mach_thread) {
                 _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
         } else {
                 _PTHREAD_LOCK(_pthread_list_lock);
         }
  
-       // The parent and child threads race to add the thread to the list.
-       // When called by the parent:
-       //  - set parentcheck to true
-       //  - back off if childrun is true
-       // When called by the child:
-       //  - set childrun to true
-       //  - back off if parentcheck is true
-       if (parent) {
-               t->parentcheck = 1;
-               if (t->childrun) {
-                       // child got here first, don't add.
-                       should_add = false;
-               }
+       TAILQ_INSERT_TAIL(&__pthread_head, t, tl_plist);
+       _pthread_count++;
  
-               // If the child exits before we check in then it has to keep
-               // the thread structure memory alive so our dereferences above
-               // are valid. If it's a detached thread, then no joiner will
-               // deallocate the thread structure itself. So we do it here.
-               if (t->childexit) {
-                       should_add = false;
-                       should_deallocate = ((t->detached & PTHREAD_CREATE_DETACHED) == PTHREAD_CREATE_DETACHED);
-               }
+       if (from_mach_thread) {
+               _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
         } else {
-               t->childrun = 1;
-               if (t->parentcheck) {
-                       // Parent got here first, don't add.
-                       should_add = false;
-               }
-               if (t->wqthread) {
-                       // Work queue threads have no parent. Simulate.
-                       t->parentcheck = 1;
-               }
+               _PTHREAD_UNLOCK(_pthread_list_lock);
         }
  
-       if (should_add) {
-               TAILQ_INSERT_TAIL(&__pthread_head, t, plist);
-               _pthread_count++;
-
-               /*
-                * Set some initial values which we know in the pthread structure in
-                * case folks try to get the values before the thread can set them.
-                */
-               if (parent && attrs && attrs->schedset == 0) {
-                       t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass;
-               }
+       if (!from_mach_thread) {
+               // PR-26275485: Mach threads will likely crash trying to run
+               // introspection code.  Since the fall out from the introspection
+               // code not seeing the injected thread is likely less than crashing
+               // in the introspection code, just don't make the call.
+               _pthread_introspection_thread_create(t);
         }
+}
  
-       if (from_mach_thread){
-               _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
+PTHREAD_ALWAYS_INLINE
+static inline void
+__pthread_undo_add_thread(pthread_t t, bool from_mach_thread)
+{
+       if (from_mach_thread) {
+               _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
         } else {
-               _PTHREAD_UNLOCK(_pthread_list_lock);
+               _PTHREAD_LOCK(_pthread_list_lock);
         }
  
-       if (parent) {
-               if (!from_mach_thread) {
-                       // PR-26275485: Mach threads will likely crash trying to run
-                       // introspection code.  Since the fall out from the introspection
-                       // code not seeing the injected thread is likely less than crashing
-                       // in the introspection code, just don't make the call.
-                       _pthread_introspection_thread_create(t, should_deallocate);
-               }
-               if (should_deallocate) {
-                       _pthread_deallocate(t);
-               }
+       TAILQ_REMOVE(&__pthread_head, t, tl_plist);
+       _pthread_count--;
+
+       if (from_mach_thread) {
+               _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
         } else {
-               _pthread_introspection_thread_start(t);
+               _PTHREAD_UNLOCK(_pthread_list_lock);
         }
  }
  
-// <rdar://problem/12544957> must always inline this function to avoid epilogues
-// Returns EBUSY if the thread structure should be kept alive (is joinable).
-// Returns ESRCH if the thread structure is no longer valid (was detached).
  PTHREAD_ALWAYS_INLINE
-static inline int
-__pthread_remove_thread(pthread_t t, bool child, bool *should_exit)
+static inline void
+__pthread_started_thread(pthread_t t)
  {
-       int ret = 0;
-
-       bool should_remove = true;
-
-       _PTHREAD_LOCK(_pthread_list_lock);
-
-       // When a thread removes itself:
-       //  - Set the childexit flag indicating that the thread has exited.
-       //  - Return false if parentcheck is zero (must keep structure)
-       //  - If the thread is joinable, keep it on the list so that
-       //    the join operation succeeds. Still decrement the running
-       //    thread count so that we exit if no threads are running.
-       //  - Update the running thread count.
-       // When another thread removes a joinable thread:
-       //  - CAREFUL not to dereference the thread before verifying that the
-       //    reference is still valid using _pthread_is_valid_locked().
-       //  - Remove the thread from the list.
-
-       if (child) {
-               t->childexit = 1;
-               if (t->parentcheck == 0) {
-                       ret = EBUSY;
-               }
-               if ((t->detached & PTHREAD_CREATE_JOINABLE) != 0) {
-                       ret = EBUSY;
-                       should_remove = false;
-               }
-               *should_exit = (--_pthread_count <= 0);
-       } else if (!_pthread_is_valid_locked(t)) {
-               ret = ESRCH;
-               should_remove = false;
-       } else if ((t->detached & PTHREAD_CREATE_JOINABLE) == 0) {
-               // If we found a thread but it's not joinable, bail.
-               ret = ESRCH;
-               should_remove = false;
-       } else if (t->parentcheck == 0) {
-               // If we're not the child thread *and* the parent has not finished
-               // creating the thread yet, then we are another thread that's joining
-               // and we cannot deallocate the pthread.
-               ret = EBUSY;
-       }
-       if (should_remove) {
-               TAILQ_REMOVE(&__pthread_head, t, plist);
+       mach_port_t kport = _pthread_kernel_thread(t);
+       if (os_slowpath(!MACH_PORT_VALID(kport))) {
+               PTHREAD_CLIENT_CRASH(kport,
+                               "Unable to allocate thread port, possible port leak");
         }
-
-       _PTHREAD_UNLOCK(_pthread_list_lock);
-
-       return ret;
+       _pthread_introspection_thread_start(t);
  }
  
+#define _PTHREAD_CREATE_NONE              0x0
+#define _PTHREAD_CREATE_FROM_MACH_THREAD  0x1
+#define _PTHREAD_CREATE_SUSPENDED         0x2
+
  static int
-_pthread_create(pthread_t *thread,
-       const pthread_attr_t *attr,
-       void *(*start_routine)(void *),
-       void *arg,
-       bool from_mach_thread)
+_pthread_create(pthread_t *thread, const pthread_attr_t *attrs,
+               void *(*start_routine)(void *), void *arg, unsigned int create_flags)
  {
         pthread_t t = NULL;
-       unsigned int flags = 0;
+       void *stack = NULL;
+       bool from_mach_thread = (create_flags & _PTHREAD_CREATE_FROM_MACH_THREAD);
  
-       pthread_attr_t *attrs = (pthread_attr_t *)attr;
         if (attrs == NULL) {
                 attrs = &_pthread_attr_default;
         } else if (attrs->sig != _PTHREAD_ATTR_SIG) {
                 return EINVAL;
         }
  
-       if (attrs->detached == PTHREAD_CREATE_DETACHED) {
-               flags |= PTHREAD_START_DETACHED;
-       }
-
+       unsigned int flags = PTHREAD_START_CUSTOM;
         if (attrs->schedset != 0) {
+               struct sched_param p;
+               _pthread_attr_get_schedparam(attrs, &p);
                 flags |= PTHREAD_START_SETSCHED;
                 flags |= ((attrs->policy & PTHREAD_START_POLICY_MASK) << PTHREAD_START_POLICY_BITSHIFT);
-               flags |= (attrs->param.sched_priority & PTHREAD_START_IMPORTANCE_MASK);
+               flags |= (p.sched_priority & PTHREAD_START_IMPORTANCE_MASK);
         } else if (attrs->qosclass != 0) {
                 flags |= PTHREAD_START_QOSCLASS;
                 flags |= (attrs->qosclass & PTHREAD_START_QOSCLASS_MASK);
         }
+       if (create_flags & _PTHREAD_CREATE_SUSPENDED) {
+               flags |= PTHREAD_START_SUSPENDED;
+       }
  
         __is_threaded = 1;
  
-       void *stack;
-
-       if (attrs->fastpath) {
-               // kernel will allocate thread and stack, pass stacksize.
-               stack = (void *)attrs->stacksize;
-       } else {
-               // allocate the thread and its stack
-               flags |= PTHREAD_START_CUSTOM;
-
-               int res;
-               res = _pthread_allocate(&t, attrs, &stack);
-               if (res) {
-                       return res;
-               }
-
-               t->arg = arg;
-               t->fun = start_routine;
+       t =_pthread_allocate(attrs, &stack);
+       if (t == NULL) {
+               return EAGAIN;
         }
  
-       pthread_t t2;
-       t2 = __bsdthread_create(start_routine, arg, stack, t, flags);
-       if (t2 == (pthread_t)-1) {
+       t->arg = arg;
+       t->fun = start_routine;
+       __pthread_add_thread(t, from_mach_thread);
+
+       if (__bsdthread_create(start_routine, arg, stack, t, flags) ==
+                       (pthread_t)-1) {
                 if (errno == EMFILE) {
                         PTHREAD_CLIENT_CRASH(0,
                                         "Unable to allocate thread port, possible port leak");
                 }
-               if (flags & PTHREAD_START_CUSTOM) {
-                       // free the thread and stack if we allocated it
-                       _pthread_deallocate(t);
-               }
+               __pthread_undo_add_thread(t, from_mach_thread);
+               _pthread_deallocate(t, from_mach_thread);
                 return EAGAIN;
         }
-       if (t == NULL) {
-               t = t2;
-       }
  
-       __pthread_add_thread(t, attrs, true, from_mach_thread);
+       if (create_flags & _PTHREAD_CREATE_SUSPENDED) {
+               _pthread_markcancel_if_canceled(t, _pthread_kernel_thread(t));
+       }
  
         // n.b. if a thread is created detached and exits, t will be invalid
         *thread = t;
@@ -1299,78 +1305,87 @@ _pthread_create(pthread_t *thread,
  }
  
  int
-pthread_create(pthread_t *thread,
-       const pthread_attr_t *attr,
-       void *(*start_routine)(void *),
-       void *arg)
+pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+               void *(*start_routine)(void *), void *arg)
  {
-       return _pthread_create(thread, attr, start_routine, arg, false);
+       unsigned int flags = _PTHREAD_CREATE_NONE;
+       return _pthread_create(thread, attr, start_routine, arg, flags);
  }
  
  int
-pthread_create_from_mach_thread(pthread_t *thread,
-       const pthread_attr_t *attr,
-       void *(*start_routine)(void *),
-       void *arg)
+pthread_create_from_mach_thread(pthread_t *thread, const pthread_attr_t *attr,
+               void *(*start_routine)(void *), void *arg)
  {
-       return _pthread_create(thread, attr, start_routine, arg, true);
+       unsigned int flags = _PTHREAD_CREATE_FROM_MACH_THREAD;
+       return _pthread_create(thread, attr, start_routine, arg, flags);
  }
  
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
+/* Functions defined in machine-dependent files. */
+PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp);
+
  PTHREAD_NORETURN
  static void
  _pthread_suspended_body(pthread_t self)
  {
         _pthread_set_self(self);
-       __pthread_add_thread(self, NULL, false, false);
+       __pthread_started_thread(self);
         _pthread_exit(self, (self->fun)(self->arg));
  }
  
-int
-pthread_create_suspended_np(pthread_t *thread,
-       const pthread_attr_t *attr,
-       void *(*start_routine)(void *),
-       void *arg)
+static int
+_pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attrs,
+               void *(*start_routine)(void *), void *arg)
  {
-       int res;
+       pthread_t t;
         void *stack;
         mach_port_t kernel_thread = MACH_PORT_NULL;
  
-       const pthread_attr_t *attrs = attr;
         if (attrs == NULL) {
                 attrs = &_pthread_attr_default;
         } else if (attrs->sig != _PTHREAD_ATTR_SIG) {
                 return EINVAL;
         }
  
-       pthread_t t;
-       res = _pthread_allocate(&t, attrs, &stack);
-       if (res) {
-               return res;
+       t = _pthread_allocate(attrs, &stack);
+       if (t == NULL) {
+               return EAGAIN;
         }
  
-       *thread = t;
-
-       kern_return_t kr;
-       kr = thread_create(mach_task_self(), &kernel_thread);
-       if (kr != KERN_SUCCESS) {
-               //PTHREAD_ABORT("thread_create() failed: %d", kern_res);
-               return EINVAL; /* Need better error here? */
+       if (thread_create(mach_task_self(), &kernel_thread) != KERN_SUCCESS) {
+               _pthread_deallocate(t, false);
+               return EAGAIN;
         }
  
         _pthread_set_kernel_thread(t, kernel_thread);
-       (void)pthread_setschedparam_internal(t, kernel_thread, t->policy, &t->param);
+       (void)pthread_setschedparam_internal(t, kernel_thread,
+                       t->tl_policy, &t->tl_param);
  
         __is_threaded = 1;
  
         t->arg = arg;
         t->fun = start_routine;
-
         t->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
-       __pthread_add_thread(t, NULL, true, false);
+       __pthread_add_thread(t, false);
  
         // Set up a suspended thread.
-       _pthread_setup(t, _pthread_suspended_body, stack, 1, 0);
-       return res;
+       _pthread_setup_suspended(t, _pthread_suspended_body, stack);
+       *thread = t;
+       return 0;
+}
+#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX
+
+int
+pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attr,
+               void *(*start_routine)(void *), void *arg)
+{
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
+       if (_os_xbs_chrooted) {
+               return _pthread_create_suspended_np(thread, attr, start_routine, arg);
+       }
+#endif
+       unsigned int flags = _PTHREAD_CREATE_SUSPENDED;
+       return _pthread_create(thread, attr, start_routine, arg, flags);
  }
  
  
@@ -1379,33 +1394,31 @@ int
  pthread_detach(pthread_t thread)
  {
         int res = 0;
-       bool join = false;
-       semaphore_t sema = SEMAPHORE_NULL;
+       bool join = false, wake = false;
  
-       if (!_pthread_is_valid(thread, PTHREAD_IS_VALID_LOCK_THREAD, NULL)) {
-               return ESRCH; // Not a valid thread to detach.
+       if (!_pthread_validate_thread_and_list_lock(thread)) {
+               return ESRCH;
         }
  
-       if ((thread->detached & PTHREAD_CREATE_DETACHED) ||
-                       !(thread->detached & PTHREAD_CREATE_JOINABLE)) {
+       if (!thread->tl_joinable) {
                 res = EINVAL;
-       } else if (thread->detached & _PTHREAD_EXITED) {
+       } else if (thread->tl_exit_gate == MACH_PORT_DEAD) {
                 // Join the thread if it's already exited.
                 join = true;
         } else {
-               thread->detached &= ~PTHREAD_CREATE_JOINABLE;
-               thread->detached |= PTHREAD_CREATE_DETACHED;
-               sema = thread->joiner_notify;
+               thread->tl_joinable = false; // _pthread_joiner_prepost_wake uses this
+               if (thread->tl_join_ctx) {
+                       (void)_pthread_joiner_prepost_wake(thread);
+                       wake = true;
+               }
         }
-
-       _PTHREAD_UNLOCK(thread->lock);
+       _PTHREAD_UNLOCK(_pthread_list_lock);
  
         if (join) {
                 pthread_join(thread, NULL);
-       } else if (sema) {
-               semaphore_signal(sema);
+       } else if (wake) {
+               _pthread_joiner_wake(thread);
         }
-
         return res;
  }
  
@@ -1418,7 +1431,7 @@ pthread_kill(pthread_t th, int sig)
         }
  
         mach_port_t kport = MACH_PORT_NULL;
-       if (!_pthread_is_valid(th, 0, &kport)) {
+       if (!_pthread_is_valid(th, &kport)) {
                 return ESRCH; // Not a valid thread.
         }
  
@@ -1452,18 +1465,19 @@ __pthread_workqueue_setkill(int enable)
  /* For compatibility... */
  
  pthread_t
-_pthread_self(void) {
+_pthread_self(void)
+{
         return pthread_self();
  }
  
  /*
   * Terminate a thread.
   */
-int __disable_threadsignal(int);
+extern int __disable_threadsignal(int);
  
  PTHREAD_NORETURN
  static void
-_pthread_exit(pthread_t self, void *value_ptr)
+_pthread_exit(pthread_t self, void *exit_value)
  {
         struct __darwin_pthread_handler_rec *handler;
  
@@ -1471,7 +1485,7 @@ _pthread_exit(pthread_t self, void *value_ptr)
         __disable_threadsignal(1);
  
         // Set cancel state to disable and type to deferred
-       _pthread_setcancelstate_exit(self, value_ptr, __unix_conforming);
+       _pthread_setcancelstate_exit(self, exit_value);
  
         while ((handler = self->__cleanup_stack) != 0) {
                 (handler->__routine)(handler->__arg);
@@ -1479,71 +1493,44 @@ _pthread_exit(pthread_t self, void *value_ptr)
         }
         _pthread_tsd_cleanup(self);
  
-       _PTHREAD_LOCK(self->lock);
-       self->detached |= _PTHREAD_EXITED;
-       self->exit_value = value_ptr;
-
-       if ((self->detached & PTHREAD_CREATE_JOINABLE) &&
-                       self->joiner_notify == SEMAPHORE_NULL) {
-               self->joiner_notify = (semaphore_t)os_get_cached_semaphore();
-       }
-       _PTHREAD_UNLOCK(self->lock);
-
         // Clear per-thread semaphore cache
         os_put_cached_semaphore(SEMAPHORE_NULL);
  
-       _pthread_terminate_invoke(self);
+       _pthread_terminate_invoke(self, exit_value);
  }
  
  void
-pthread_exit(void *value_ptr)
+pthread_exit(void *exit_value)
  {
         pthread_t self = pthread_self();
-       if (self->wqthread == 0) {
-               _pthread_exit(self, value_ptr);
-       } else {
-               PTHREAD_ABORT("pthread_exit() may only be called against threads created via pthread_create()");
+       if (os_unlikely(self->wqthread)) {
+               PTHREAD_CLIENT_CRASH(0, "pthread_exit() called from a thread "
+                               "not created by pthread_create()");
         }
+       _pthread_exit(self, exit_value);
  }
  
  
  PTHREAD_NOEXPORT_VARIANT
  int
-pthread_getschedparam(pthread_t thread,
-                     int *policy,
-                     struct sched_param *param)
+pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param)
  {
-       int ret = 0;
-
-       if (thread == NULL) {
+       if (!_pthread_validate_thread_and_list_lock(thread)) {
                 return ESRCH;
         }
  
-       _PTHREAD_LOCK(_pthread_list_lock);
-
-       if (_pthread_is_valid_locked(thread)) {
-               if (policy) {
-                       *policy = thread->policy;
-               }
-               if (param) {
-                       *param = thread->param;
-               }
-       } else {
-               ret = ESRCH;
-       }
-
+       if (policy) *policy = thread->tl_policy;
+       if (param) *param = thread->tl_param;
         _PTHREAD_UNLOCK(_pthread_list_lock);
-
-       return ret;
+       return 0;
  }
  
  
+
  PTHREAD_ALWAYS_INLINE
  static inline int
-pthread_setschedparam_internal(pthread_t thread,
-                     mach_port_t kport,
-                     int policy,
-                     const struct sched_param *param)
+pthread_setschedparam_internal(pthread_t thread, mach_port_t kport, int policy,
+               const struct sched_param *param)
  {
         policy_base_data_t bases;
         policy_base_t base;
@@ -1575,41 +1562,37 @@ pthread_setschedparam_internal(pthread_t thread,
         return (ret != KERN_SUCCESS) ? EINVAL : 0;
  }
  
-
  PTHREAD_NOEXPORT_VARIANT
  int
  pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
  {
         mach_port_t kport = MACH_PORT_NULL;
-       int res;
         int bypass = 1;
  
         // since the main thread will not get de-allocated from underneath us
-       if (t == pthread_self() || t == &_thread) {
+       if (t == pthread_self() || t == main_thread()) {
                 kport = _pthread_kernel_thread(t);
         } else {
                 bypass = 0;
-               (void)_pthread_is_valid(t, 0, &kport);
+               if (!_pthread_is_valid(t, &kport)) {
+                       return ESRCH;
+               }
         }
  
-       res = pthread_setschedparam_internal(t, kport, policy, param);
-       if (res == 0) {
-               if (bypass == 0) {
-                       // Ensure the thread is still valid.
-                       _PTHREAD_LOCK(_pthread_list_lock);
-                       if (_pthread_is_valid_locked(t)) {
-                               t->policy = policy;
-                               t->param = *param;
-                       } else {
-                               res = ESRCH;
-                       }
-                       _PTHREAD_UNLOCK(_pthread_list_lock);
-               }  else {
-                       t->policy = policy;
-                       t->param = *param;
-               }
+       int res = pthread_setschedparam_internal(t, kport, policy, param);
+       if (res) return res;
+
+       if (bypass) {
+               _PTHREAD_LOCK(_pthread_list_lock);
+       } else if (!_pthread_validate_thread_and_list_lock(t)) {
+               // Ensure the thread is still valid.
+               return ESRCH;
         }
-       return res;
+
+       t->tl_policy = policy;
+       t->tl_param = *param;
+       _PTHREAD_UNLOCK(_pthread_list_lock);
+       return 0;
  }
  
  
@@ -1639,25 +1622,49 @@ PTHREAD_NOINLINE
  void
  _pthread_set_self(pthread_t p)
  {
-       return _pthread_set_self_internal(p, true);
+#if VARIANT_DYLD
+       if (os_likely(!p)) {
+               return _pthread_set_self_dyld();
+       }
+#endif // VARIANT_DYLD
+       _pthread_set_self_internal(p, true);
  }
  
-PTHREAD_ALWAYS_INLINE
-static inline void
-_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set)
+#if VARIANT_DYLD
+// _pthread_set_self_dyld is noinline+noexport to allow the option for
+// static libsyscall to adopt this as the entry point from mach_init if
+// desired
+PTHREAD_NOINLINE PTHREAD_NOEXPORT
+void
+_pthread_set_self_dyld(void)
  {
-       if (p == NULL) {
-               p = &_thread;
-       }
+       pthread_t p = main_thread();
+       p->thread_id = __thread_selfid();
  
-       uint64_t tid = __thread_selfid();
-       if (tid == -1ull) {
-               PTHREAD_ABORT("failed to set thread_id");
+       if (os_unlikely(p->thread_id == -1ull)) {
+               PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id");
         }
  
+       // <rdar://problem/40930651> pthread self and the errno address are the
+       // bare minimium TSD setup that dyld needs to actually function.  Without
+       // this, TSD access will fail and crash if it uses bits of Libc prior to
+       // library initialization. __pthread_init will finish the initialization
+       // during library init.
         p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = p;
         p->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &p->err_no;
-       p->thread_id = tid;
+       _thread_set_tsd_base(&p->tsd[0]);
+}
+#endif // VARIANT_DYLD
+
+PTHREAD_ALWAYS_INLINE
+static inline void
+_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set)
+{
+       p->thread_id = __thread_selfid();
+
+       if (os_unlikely(p->thread_id == -1ull)) {
+               PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id");
+       }
  
         if (needs_tsd_base_set) {
                 _thread_set_tsd_base(&p->tsd[0]);
@@ -1719,6 +1726,32 @@ pthread_setconcurrency(int new_level)
         return 0;
  }
  
+#if !defined(VARIANT_STATIC)
+void *
+malloc(size_t sz)
+{
+       if (_pthread_malloc) {
+               return _pthread_malloc(sz);
+       } else {
+               return NULL;
+       }
+}
+
+void
+free(void *p)
+{
+       if (_pthread_free) {
+               _pthread_free(p);
+       }
+}
+#endif // VARIANT_STATIC
+
+/*
+ * Perform package initialization - called automatically when application starts
+ */
+struct ProgramVars; /* forward reference */
+
+#if !VARIANT_DYLD
  static unsigned long
  _pthread_strtoul(const char *p, const char **endptr, int base)
  {
@@ -1777,36 +1810,29 @@ out:
         return ret;
  }
  
-#if !defined(VARIANT_STATIC)
-void *
-malloc(size_t sz)
+static void
+parse_ptr_munge_params(const char *envp[], const char *apple[])
  {
-       if (_pthread_malloc) {
-               return _pthread_malloc(sz);
-       } else {
-               return NULL;
+       const char *p, *s;
+       p = _simple_getenv(apple, "ptr_munge");
+       if (p) {
+               _pthread_ptr_munge_token = _pthread_strtoul(p, &s, 16);
+               bzero((char *)p, strlen(p));
         }
-}
-
-void
-free(void *p)
-{
-       if (_pthread_free) {
-               _pthread_free(p);
+#if !DEBUG
+       if (_pthread_ptr_munge_token) return;
+#endif
+       p = _simple_getenv(envp, "PTHREAD_PTR_MUNGE_TOKEN");
+       if (p) {
+               uintptr_t t = _pthread_strtoul(p, &s, 16);
+               if (t) _pthread_ptr_munge_token = t;
         }
  }
-#endif // VARIANT_STATIC
-
-/*
- * Perform package initialization - called automatically when application starts
- */
-struct ProgramVars; /* forward reference */
  
  int
  __pthread_init(const struct _libpthread_functions *pthread_funcs,
-              const char *envp[] __unused,
-              const char *apple[],
-              const struct ProgramVars *vars __unused)
+               const char *envp[], const char *apple[],
+               const struct ProgramVars *vars __unused)
  {
         // Save our provided pushed-down functions
         if (pthread_funcs) {
@@ -1829,11 +1855,11 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
         host_t host = mach_host_self();
         kr = host_info(host, flavor, (host_info_t)&priority_info, &count);
         if (kr != KERN_SUCCESS) {
-               PTHREAD_ABORT("host_info(mach_host_self(), ...) failed: %s", mach_error_string(kr));
+               PTHREAD_INTERNAL_CRASH(kr, "host_info() failed");
         } else {
-               default_priority = priority_info.user_priority;
-               min_priority = priority_info.minimum_priority;
-               max_priority = priority_info.maximum_priority;
+               default_priority = (uint8_t)priority_info.user_priority;
+               min_priority = (uint8_t)priority_info.minimum_priority;
+               max_priority = (uint8_t)priority_info.maximum_priority;
         }
         mach_port_deallocate(mach_task_self(), host);
  
@@ -1863,12 +1889,22 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
                 allocsize = 0;
         }
  
-       pthread_t thread = &_thread;
-       pthread_attr_init(&_pthread_attr_default);
+       // Initialize random ptr_munge token from the kernel.
+       parse_ptr_munge_params(envp, apple);
+
+       // libpthread.a in dyld "owns" the main thread structure itself and sets
+       // up the tsd to point to it. So take the pthread_self() from there
+       // and make it our main thread point.
+       pthread_t thread = (pthread_t)_pthread_getspecific_direct(
+                       _PTHREAD_TSD_SLOT_PTHREAD_SELF);
+       PTHREAD_ASSERT(thread);
+       _main_thread_ptr = thread;
+
+       PTHREAD_ASSERT(_pthread_attr_default.qosclass ==
+                       _pthread_default_priority(0));
         _pthread_struct_init(thread, &_pthread_attr_default,
-                            stackaddr, stacksize,
-                            allocaddr, allocsize);
-       thread->detached = PTHREAD_CREATE_JOINABLE;
+                       stackaddr, stacksize, allocaddr, allocsize);
+       thread->tl_joinable = true;
  
         // Finish initialization with common code that is reinvoked on the
         // child side of a fork.
@@ -1897,66 +1933,35 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
  
         return 0;
  }
+#endif // !VARIANT_DYLD
  
  PTHREAD_NOEXPORT void
  _pthread_main_thread_init(pthread_t p)
  {
         TAILQ_INIT(&__pthread_head);
         _PTHREAD_LOCK_INIT(_pthread_list_lock);
-
-       // Re-use the main thread's static storage if no thread was provided.
-       if (p == NULL) {
-               if (_thread.tsd[0] != 0) {
-                       bzero(&_thread, sizeof(struct _pthread));
-               }
-               p = &_thread;
-       }
-
         _PTHREAD_LOCK_INIT(p->lock);
         _pthread_set_kernel_thread(p, mach_thread_self());
         _pthread_set_reply_port(p, mach_reply_port());
         p->__cleanup_stack = NULL;
-       p->joiner_notify = SEMAPHORE_NULL;
-       p->joiner = MACH_PORT_NULL;
-       p->detached |= _PTHREAD_CREATE_PARENT;
+       p->tl_join_ctx = NULL;
+       p->tl_exit_gate = MACH_PORT_NULL;
         p->tsd[__TSD_SEMAPHORE_CACHE] = (void*)SEMAPHORE_NULL;
+       p->tsd[__TSD_MACH_SPECIAL_REPLY] = 0;
         p->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
  
         // Initialize the list of threads with the new main thread.
-       TAILQ_INSERT_HEAD(&__pthread_head, p, plist);
+       TAILQ_INSERT_HEAD(&__pthread_head, p, tl_plist);
         _pthread_count = 1;
  
-       _pthread_set_self(p);
         _pthread_introspection_thread_start(p);
  }
  
-int
-_pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming)
-{
-       int ret = __pthread_remove_thread(thread, false, NULL);
-       if (ret != 0 && ret != EBUSY) {
-               // Returns ESRCH if the thread was not created joinable.
-               return ret;
-       }
-
-       if (value_ptr) {
-               *value_ptr = _pthread_get_exit_value(thread, conforming);
-       }
-       _pthread_introspection_thread_destroy(thread);
-       if (ret != EBUSY) {
-               // __pthread_remove_thread returns EBUSY if the parent has not
-               // finished creating the thread (and is still expecting the pthread_t
-               // to be alive).
-               _pthread_deallocate(thread);
-       }
-       return 0;
-}
-
  int
  sched_yield(void)
  {
-    swtch_pri(0);
-    return 0;
+       swtch_pri(0);
+       return 0;
  }
  
  // XXX remove
@@ -1974,22 +1979,25 @@ pthread_yield_np(void)
  
  
  
+// Libsystem knows about this symbol and exports it to libsyscall
  PTHREAD_NOEXPORT_VARIANT
  void
  _pthread_clear_qos_tsd(mach_port_t thread_port)
  {
         if (thread_port == MACH_PORT_NULL || (uintptr_t)_pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF) == thread_port) {
                 /* Clear the current thread's TSD, that can be done inline. */
-               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0));
+               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS,
+                               _pthread_unspecified_priority());
         } else {
                 pthread_t p;
  
                 _PTHREAD_LOCK(_pthread_list_lock);
  
-               TAILQ_FOREACH(p, &__pthread_head, plist) {
+               TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
                         mach_port_t kp = _pthread_kernel_thread(p);
                         if (thread_port == kp) {
-                               p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+                               p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] =
+                                               _pthread_unspecified_priority();
                                 break;
                         }
                 }
@@ -1999,7 +2007,35 @@ _pthread_clear_qos_tsd(mach_port_t thread_port)
  }
  
  
-/***** pthread workqueue support routines *****/
+#pragma mark pthread/stack_np.h public interface
+
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__)
+typedef uintptr_t frame_data_addr_t;
+
+struct frame_data {
+       frame_data_addr_t frame_addr_next;
+       frame_data_addr_t ret_addr;
+};
+#else
+#error ********** Unimplemented architecture
+#endif
+
+uintptr_t
+pthread_stack_frame_decode_np(uintptr_t frame_addr, uintptr_t *return_addr)
+{
+       struct frame_data *frame = (struct frame_data *)frame_addr;
+
+       if (return_addr) {
+               *return_addr = (uintptr_t)frame->ret_addr;
+       }
+
+       return (uintptr_t)frame->frame_addr_next;
+}
+
+
+#pragma mark pthread workqueue support routines
+
  
  PTHREAD_NOEXPORT void
  _pthread_bsdthread_init(struct _pthread_registration_data *data)
@@ -2011,19 +2047,18 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
         data->tsd_offset = offsetof(struct _pthread, tsd);
         data->mach_thread_self_offset = __TSD_MACH_THREAD_SELF * sizeof(void *);
  
-       int rv = __bsdthread_register(thread_start,
-                       start_wqthread, (int)PTHREAD_SIZE,
-                       (void*)data, (uintptr_t)sizeof(*data),
-                       data->dispatch_queue_offset);
+       int rv = __bsdthread_register(thread_start, start_wqthread, (int)PTHREAD_SIZE,
+                       (void*)data, (uintptr_t)sizeof(*data), data->dispatch_queue_offset);
  
         if (rv > 0) {
-               if ((rv & PTHREAD_FEATURE_QOS_DEFAULT) == 0) {
-                       PTHREAD_INTERNAL_CRASH(rv,
-                                       "Missing required support for QOS_CLASS_DEFAULT");
-               }
-               if ((rv & PTHREAD_FEATURE_QOS_MAINTENANCE) == 0) {
-                       PTHREAD_INTERNAL_CRASH(rv,
-                                       "Missing required support for QOS_CLASS_MAINTENANCE");
+               int required_features =
+                               PTHREAD_FEATURE_FINEPRIO |
+                               PTHREAD_FEATURE_BSDTHREADCTL |
+                               PTHREAD_FEATURE_SETSELF |
+                               PTHREAD_FEATURE_QOS_MAINTENANCE |
+                               PTHREAD_FEATURE_QOS_DEFAULT;
+               if ((rv & required_features) != required_features) {
+                       PTHREAD_INTERNAL_CRASH(rv, "Missing required kernel support");
                 }
                 __pthread_supported_features = rv;
         }
@@ -2039,9 +2074,13 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
  
         pthread_priority_t main_qos = (pthread_priority_t)data->main_qos;
  
-       if (_pthread_priority_get_qos_newest(main_qos) != QOS_CLASS_UNSPECIFIED) {
+       if (_pthread_priority_thread_qos(main_qos) != THREAD_QOS_UNSPECIFIED) {
                 _pthread_set_main_qos(main_qos);
-               _thread.tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos;
+               main_thread()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos;
+       }
+
+       if (data->stack_addr_hint) {
+               __pthread_stack_hint = data->stack_addr_hint;
         }
  
         if (__libdispatch_workerfunction != NULL) {
@@ -2050,191 +2089,188 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
         }
  }
  
-// workqueue entry point from kernel
-PTHREAD_NORETURN
-void
-_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr, void *keventlist, int flags, int nkevents)
+PTHREAD_NOINLINE
+static void
+_pthread_wqthread_legacy_worker_wrap(pthread_priority_t pp)
+{
+       /* Old thread priorities are inverted from where we have them in
+        * the new flexible priority scheme. The highest priority is zero,
+        * up to 2, with background at 3.
+        */
+       pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
+       bool overcommit = (pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG);
+       int opts = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
+
+       switch (_pthread_priority_thread_qos(pp)) {
+       case THREAD_QOS_USER_INITIATED:
+               return (*func)(WORKQ_HIGH_PRIOQUEUE, opts, NULL);
+       case THREAD_QOS_LEGACY:
+               /* B&I builders can't pass a QOS_CLASS_DEFAULT thread to dispatch, for fear of the QoS being
+                * picked up by NSThread (et al) and transported around the system. So change the TSD to
+                * make this thread look like QOS_CLASS_USER_INITIATED even though it will still run as legacy.
+                */
+               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS,
+                               _pthread_priority_make_from_thread_qos(THREAD_QOS_USER_INITIATED, 0, 0));
+               return (*func)(WORKQ_DEFAULT_PRIOQUEUE, opts, NULL);
+       case THREAD_QOS_UTILITY:
+               return (*func)(WORKQ_LOW_PRIOQUEUE, opts, NULL);
+       case THREAD_QOS_BACKGROUND:
+               return (*func)(WORKQ_BG_PRIOQUEUE, opts, NULL);
+       }
+       PTHREAD_INTERNAL_CRASH(pp, "Invalid pthread priority for the legacy interface");
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline pthread_priority_t
+_pthread_wqthread_priority(int flags)
  {
-       PTHREAD_ASSERT(flags & WQ_FLAG_THREAD_NEWSPI);
+       pthread_priority_t pp = 0;
+       thread_qos_t qos;
+
+       if (flags & WQ_FLAG_THREAD_KEVENT) {
+               pp |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+       }
+       if (flags & WQ_FLAG_THREAD_EVENT_MANAGER) {
+               return pp | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+       }
  
-       bool thread_reuse = flags & WQ_FLAG_THREAD_REUSE;
-       bool overcommit = flags & WQ_FLAG_THREAD_OVERCOMMIT;
-       bool kevent = flags & WQ_FLAG_THREAD_KEVENT;
-       bool workloop = (flags & WQ_FLAG_THREAD_WORKLOOP) &&
-                       __libdispatch_workloopfunction != NULL;
-       PTHREAD_ASSERT((!kevent) || (__libdispatch_keventfunction != NULL));
-       PTHREAD_ASSERT(!workloop || kevent);
+       if (flags & WQ_FLAG_THREAD_OVERCOMMIT) {
+               pp |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
+       }
+       if (flags & WQ_FLAG_THREAD_PRIO_QOS) {
+               qos = (thread_qos_t)(flags & WQ_FLAG_THREAD_PRIO_MASK);
+               pp = _pthread_priority_make_from_thread_qos(qos, 0, pp);
+       } else if (flags & WQ_FLAG_THREAD_PRIO_SCHED) {
+               pp |= _PTHREAD_PRIORITY_SCHED_PRI_MASK;
+               pp |= (flags & WQ_FLAG_THREAD_PRIO_MASK);
+       } else {
+               PTHREAD_INTERNAL_CRASH(flags, "Missing priority");
+       }
+       return pp;
+}
  
-       pthread_priority_t priority = 0;
-       unsigned long priority_flags = 0;
+PTHREAD_NOINLINE
+static void
+_pthread_wqthread_setup(pthread_t self, mach_port_t kport, void *stacklowaddr,
+               int flags)
+{
+       void *stackaddr = self;
+       size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr;
  
-       if (overcommit)
-               priority_flags |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-       if (flags & WQ_FLAG_THREAD_EVENT_MANAGER)
-               priority_flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-       if (kevent)
-               priority_flags |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+       _pthread_struct_init(self, &_pthread_attr_default, stackaddr, stacksize,
+                       PTHREAD_ALLOCADDR(stackaddr, stacksize),
+                       PTHREAD_ALLOCSIZE(stackaddr, stacksize));
  
-       int thread_class = flags & WQ_FLAG_THREAD_PRIOMASK;
-       priority = _pthread_priority_make_newest(thread_class, 0, priority_flags);
+       _pthread_set_kernel_thread(self, kport);
+       self->wqthread = 1;
+       self->wqkillset = 0;
+       self->tl_joinable = false;
+       self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
  
-       if (!thread_reuse) {
-               // New thread created by kernel, needs initialization.
-               void *stackaddr = self;
-               size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr;
+       // Update the running thread count and set childrun bit.
+       bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET);
+       _pthread_set_self_internal(self, !thread_tsd_base_set);
+       __pthread_add_thread(self, false);
+       __pthread_started_thread(self);
+}
  
-               _pthread_struct_init(self, &_pthread_attr_default,
-                                                        stackaddr, stacksize,
-                                                        PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize));
+PTHREAD_NORETURN PTHREAD_NOINLINE
+static void
+_pthread_wqthread_exit(pthread_t self)
+{
+       pthread_priority_t pp;
+       thread_qos_t qos;
  
-               _pthread_set_kernel_thread(self, kport);
-               self->wqthread = 1;
-               self->wqkillset = 0;
-               self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
+       pp = (pthread_priority_t)self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
+       qos = _pthread_priority_thread_qos(pp);
+       if (qos == THREAD_QOS_UNSPECIFIED || qos > WORKQ_THREAD_QOS_CLEANUP) {
+               // Reset QoS to something low for the cleanup process
+               pp = _pthread_priority_make_from_thread_qos(WORKQ_THREAD_QOS_CLEANUP, 0, 0);
+               self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp;
+       }
  
-               // Not a joinable thread.
-               self->detached &= ~PTHREAD_CREATE_JOINABLE;
-               self->detached |= PTHREAD_CREATE_DETACHED;
+       _pthread_exit(self, NULL);
+}
  
-               // Update the running thread count and set childrun bit.
-               bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET);
-               _pthread_set_self_internal(self, !thread_tsd_base_set);
-               _pthread_introspection_thread_create(self, false);
-               __pthread_add_thread(self, NULL, false, false);
+// workqueue entry point from kernel
+void
+_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr,
+               void *keventlist, int flags, int nkevents)
+{
+       if ((flags & WQ_FLAG_THREAD_REUSE) == 0) {
+               _pthread_wqthread_setup(self, kport, stacklowaddr, flags);
         }
  
-       // If we're running with fine-grained priority, we also need to
-       // set this thread to have the QoS class provided to use by the kernel
-       if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(thread_class, 0, priority_flags));
+       pthread_priority_t pp;
+       if (flags & WQ_FLAG_THREAD_OUTSIDEQOS) {
+               self->wqoutsideqos = 1;
+               pp = _pthread_priority_make_from_thread_qos(THREAD_QOS_LEGACY, 0,
+                               _PTHREAD_PRIORITY_FALLBACK_FLAG);
+       } else {
+               self->wqoutsideqos = 0;
+               pp = _pthread_wqthread_priority(flags);
         }
  
-#if WQ_DEBUG
-       PTHREAD_ASSERT(self);
-       PTHREAD_ASSERT(self == pthread_self());
-#endif // WQ_DEBUG
+       self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp;
  
-       if (workloop) {
+       // avoid spills on the stack hard to keep used stack space minimal
+       if (nkevents == WORKQ_EXIT_THREAD_NKEVENT) {
+               goto exit;
+       } else if (flags & WQ_FLAG_THREAD_WORKLOOP) {
                 self->fun = (void *(*)(void*))__libdispatch_workloopfunction;
-       } else if (kevent){
+               self->wq_retop = WQOPS_THREAD_WORKLOOP_RETURN;
+               self->wq_kqid_ptr = ((kqueue_id_t *)keventlist - 1);
+               self->arg = keventlist;
+               self->wq_nevents = nkevents;
+       } else if (flags & WQ_FLAG_THREAD_KEVENT) {
                 self->fun = (void *(*)(void*))__libdispatch_keventfunction;
+               self->wq_retop = WQOPS_THREAD_KEVENT_RETURN;
+               self->wq_kqid_ptr = NULL;
+               self->arg = keventlist;
+               self->wq_nevents = nkevents;
         } else {
                 self->fun = (void *(*)(void*))__libdispatch_workerfunction;
+               self->wq_retop = WQOPS_THREAD_RETURN;
+               self->wq_kqid_ptr = NULL;
+               self->arg = (void *)(uintptr_t)pp;
+               self->wq_nevents = 0;
+               if (os_likely(__workq_newapi)) {
+                       (*__libdispatch_workerfunction)(pp);
+               } else {
+                       _pthread_wqthread_legacy_worker_wrap(pp);
+               }
+               goto just_return;
         }
-       self->arg = (void *)(uintptr_t)thread_class;
-
-       if (kevent && keventlist && nkevents > 0){
-               int errors_out;
-       kevent_errors_retry:
  
-               if (workloop) {
-                       kqueue_id_t kevent_id = *(kqueue_id_t*)((char*)keventlist - sizeof(kqueue_id_t));
-                       kqueue_id_t kevent_id_in = kevent_id;
-                       (__libdispatch_workloopfunction)(&kevent_id, &keventlist, &nkevents);
-                       PTHREAD_ASSERT(kevent_id == kevent_id_in || nkevents == 0);
-                       errors_out = __workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, keventlist, nkevents, 0);
+       if (nkevents > 0) {
+kevent_errors_retry:
+               if (self->wq_retop == WQOPS_THREAD_WORKLOOP_RETURN) {
+                       ((pthread_workqueue_function_workloop_t)self->fun)
+                                       (self->wq_kqid_ptr, &self->arg, &self->wq_nevents);
                 } else {
-                       (__libdispatch_keventfunction)(&keventlist, &nkevents);
-                       errors_out = __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, keventlist, nkevents, 0);
+                       ((pthread_workqueue_function_kevent_t)self->fun)
+                                       (&self->arg, &self->wq_nevents);
                 }
-
-               if (errors_out > 0){
-                       nkevents = errors_out;
+               int rc = __workq_kernreturn(self->wq_retop, self->arg, self->wq_nevents, 0);
+               if (os_unlikely(rc > 0)) {
+                       self->wq_nevents = rc;
                         goto kevent_errors_retry;
-               } else if (errors_out < 0){
-                       PTHREAD_ABORT("kevent return produced an error: %d", errno);
-               }
-               goto thexit;
-    } else if (kevent){
-               if (workloop) {
-                       (__libdispatch_workloopfunction)(0, NULL, NULL);
-                       __workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, NULL, 0, -1);
-               } else {
-                       (__libdispatch_keventfunction)(NULL, NULL);
-                       __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, NULL, 0, 0);
                 }
-
-               goto thexit;
-    }
-
-       if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-               if (!__workq_newapi) {
-                       /* Old thread priorities are inverted from where we have them in
-                        * the new flexible priority scheme. The highest priority is zero,
-                        * up to 2, with background at 3.
-                        */
-                       pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
-
-                       int opts = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
-
-                       if ((__pthread_supported_features & PTHREAD_FEATURE_QOS_DEFAULT) == 0) {
-                               /* Dirty hack to support kernels that don't have QOS_CLASS_DEFAULT. */
-                               switch (thread_class) {
-                                       case QOS_CLASS_USER_INTERACTIVE:
-                                               thread_class = QOS_CLASS_USER_INITIATED;
-                                               break;
-                                       case QOS_CLASS_USER_INITIATED:
-                                               thread_class = QOS_CLASS_DEFAULT;
-                                               break;
-                                       default:
-                                               break;
-                               }
-                       }
-
-                       switch (thread_class) {
-                               /* QOS_CLASS_USER_INTERACTIVE is not currently requested by for old dispatch priority compatibility */
-                               case QOS_CLASS_USER_INITIATED:
-                                       (*func)(WORKQ_HIGH_PRIOQUEUE, opts, NULL);
-                                       break;
-
-                               case QOS_CLASS_DEFAULT:
-                                       /* B&I builders can't pass a QOS_CLASS_DEFAULT thread to dispatch, for fear of the QoS being
-                                        * picked up by NSThread (et al) and transported around the system. So change the TSD to
-                                        * make this thread look like QOS_CLASS_USER_INITIATED even though it will still run as legacy.
-                                        */
-                                       _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, 0));
-                                       (*func)(WORKQ_DEFAULT_PRIOQUEUE, opts, NULL);
-                                       break;
-
-                               case QOS_CLASS_UTILITY:
-                                       (*func)(WORKQ_LOW_PRIOQUEUE, opts, NULL);
-                                       break;
-
-                               case QOS_CLASS_BACKGROUND:
-                                       (*func)(WORKQ_BG_PRIOQUEUE, opts, NULL);
-                                       break;
-
-                               /* Legacy dispatch does not use QOS_CLASS_MAINTENANCE, so no need to handle it here */
-                       }
-
-               } else {
-                       /* "New" API, where dispatch is expecting to be given the thread priority */
-                       (*__libdispatch_workerfunction)(priority);
+               if (os_unlikely(rc < 0)) {
+                       PTHREAD_INTERNAL_CRASH(self->err_no, "kevent (workloop) failed");
                 }
         } else {
-               /* We're the new library running on an old kext, so thread_class is really the workq priority. */
-               pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
-               int options = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
-               (*func)(thread_class, options, NULL);
-       }
-
-       __workq_kernreturn(WQOPS_THREAD_RETURN, NULL, 0, 0);
-
-thexit:
-       {
-               pthread_priority_t current_priority = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-               if ((current_priority & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) ||
-                       (_pthread_priority_get_qos_newest(current_priority) > WQ_THREAD_CLEANUP_QOS)) {
-                       // Reset QoS to something low for the cleanup process
-                       priority = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
-                       _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
-               }
+just_return:
+               __workq_kernreturn(self->wq_retop, NULL, 0, 0);
         }
  
-       _pthread_exit(self, NULL);
+exit:
+       _pthread_wqthread_exit(self);
  }
  
-/***** pthread workqueue API for libdispatch *****/
+
+#pragma mark pthread workqueue API for libdispatch
+
  
  _Static_assert(WORKQ_KEVENT_EVENT_BUFFER_LEN == WQ_KEVENT_LIST_LEN,
                 "Kernel and userland should agree on the event list size");
@@ -2329,42 +2365,18 @@ pthread_workqueue_addthreads_np(int queue_priority, int options, int numthreads)
         }
  
         pthread_priority_t kp = 0;
+       int compat_priority = queue_priority & WQ_FLAG_THREAD_PRIO_MASK;
+       int flags = 0;
  
-       if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-               /* The new kernel API takes the new QoS class + relative priority style of
-                * priority. This entry point is here for compatibility with old libdispatch
-                * versions (ie. the simulator). We request the corresponding new bracket
-                * from the kernel, then on the way out run all dispatch queues that were
-                * requested.
-                */
-
-               int compat_priority = queue_priority & WQ_FLAG_THREAD_PRIOMASK;
-               int flags = 0;
-
-               /* To make sure the library does not issue more threads to dispatch than
-                * were requested, the total number of active requests is recorded in
-                * __workq_requests.
-                */
-               if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
-                       flags = _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-               }
+       if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
+               flags = _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
+       }
  
  #pragma clang diagnostic push
  #pragma clang diagnostic ignored "-Wdeprecated-declarations"
-               kp = _pthread_qos_class_encode_workqueue(compat_priority, flags);
+       kp = _pthread_qos_class_encode_workqueue(compat_priority, flags);
  #pragma clang diagnostic pop
  
-       } else {
-               /* Running on the old kernel, queue_priority is what we pass directly to
-                * the syscall.
-                */
-               kp = queue_priority & WQ_FLAG_THREAD_PRIOMASK;
-
-               if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
-                       kp |= WORKQUEUE_OVERCOMMIT;
-               }
-       }
-
         res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)kp);
         if (res == -1) {
                 res = errno;
@@ -2391,9 +2403,17 @@ _pthread_workqueue_addthreads(int numthreads, pthread_priority_t priority)
                 return EPERM;
         }
  
-       if ((__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) == 0) {
-               return ENOTSUP;
-       }
+#if TARGET_OS_OSX
+       // <rdar://problem/37687655> Legacy simulators fail to boot
+       //
+       // Older sims set the deprecated _PTHREAD_PRIORITY_ROOTQUEUE_FLAG wrongly,
+       // which is aliased to _PTHREAD_PRIORITY_SCHED_PRI_FLAG and that XNU
+       // validates and rejects.
+       //
+       // As a workaround, forcefully unset this bit that cannot be set here
+       // anyway.
+       priority &= ~_PTHREAD_PRIORITY_SCHED_PRI_FLAG;
+#endif
  
         res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)priority);
         if (res == -1) {
@@ -2412,9 +2432,62 @@ _pthread_workqueue_set_event_manager_priority(pthread_priority_t priority)
         return res;
  }
  
-/*
- * Introspection SPI for libpthread.
- */
+int
+_pthread_workloop_create(uint64_t workloop_id, uint64_t options, pthread_attr_t *attr)
+{
+       struct kqueue_workloop_params params = {
+               .kqwlp_version = sizeof(struct kqueue_workloop_params),
+               .kqwlp_id = workloop_id,
+               .kqwlp_flags = 0,
+       };
+
+       if (!attr) {
+               return EINVAL;
+       }
+
+       if (attr->schedset) {
+               params.kqwlp_flags |= KQ_WORKLOOP_CREATE_SCHED_PRI;
+               params.kqwlp_sched_pri = attr->param.sched_priority;
+       }
+
+       if (attr->policyset) {
+               params.kqwlp_flags |= KQ_WORKLOOP_CREATE_SCHED_POL;
+               params.kqwlp_sched_pol = attr->policy;
+       }
+
+       if (attr->cpupercentset) {
+               params.kqwlp_flags |= KQ_WORKLOOP_CREATE_CPU_PERCENT;
+               params.kqwlp_cpu_percent = attr->cpupercent;
+               params.kqwlp_cpu_refillms = attr->refillms;
+       }
+
+       int res = __kqueue_workloop_ctl(KQ_WORKLOOP_CREATE, 0, &params,
+                       sizeof(params));
+       if (res == -1) {
+               res = errno;
+       }
+       return res;
+}
+
+int
+_pthread_workloop_destroy(uint64_t workloop_id)
+{
+       struct kqueue_workloop_params params = {
+               .kqwlp_version = sizeof(struct kqueue_workloop_params),
+               .kqwlp_id = workloop_id,
+       };
+
+       int res = __kqueue_workloop_ctl(KQ_WORKLOOP_DESTROY, 0, &params,
+                       sizeof(params));
+       if (res == -1) {
+               res = errno;
+       }
+       return res;
+}
+
+
+#pragma mark Introspection SPI for libpthread.
+
  
  static pthread_introspection_hook_t _pthread_introspection_hook;
  
@@ -2428,19 +2501,17 @@ pthread_introspection_hook_install(pthread_introspection_hook_t hook)
  
  PTHREAD_NOINLINE
  static void
-_pthread_introspection_hook_callout_thread_create(pthread_t t, bool destroy)
+_pthread_introspection_hook_callout_thread_create(pthread_t t)
  {
         _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_CREATE, t, t,
                         PTHREAD_SIZE);
-       if (!destroy) return;
-       _pthread_introspection_thread_destroy(t);
  }
  
  static inline void
-_pthread_introspection_thread_create(pthread_t t, bool destroy)
+_pthread_introspection_thread_create(pthread_t t)
  {
         if (os_fastpath(!_pthread_introspection_hook)) return;
-       _pthread_introspection_hook_callout_thread_create(t, destroy);
+       _pthread_introspection_hook_callout_thread_create(t);
  }
  
  PTHREAD_NOINLINE
@@ -2449,8 +2520,9 @@ _pthread_introspection_hook_callout_thread_start(pthread_t t)
  {
         size_t freesize;
         void *freeaddr;
-       if (t == &_thread) {
-               freesize = t->stacksize + t->guardsize;
+       if (t == main_thread()) {
+               size_t stacksize = t->stackaddr - t->stackbottom;
+               freesize = stacksize + t->guardsize;
                 freeaddr = t->stackaddr - freesize;
         } else {
                 freesize = t->freesize - PTHREAD_SIZE;
@@ -2469,32 +2541,33 @@ _pthread_introspection_thread_start(pthread_t t)
  
  PTHREAD_NOINLINE
  static void
-_pthread_introspection_hook_callout_thread_terminate(pthread_t t,
-               void *freeaddr, size_t freesize, bool destroy)
+_pthread_introspection_hook_callout_thread_terminate(pthread_t t)
  {
-       if (destroy && freesize) {
-               freesize -= PTHREAD_SIZE;
+       size_t freesize;
+       void *freeaddr;
+       if (t == main_thread()) {
+               size_t stacksize = t->stackaddr - t->stackbottom;
+               freesize = stacksize + t->guardsize;
+               freeaddr = t->stackaddr - freesize;
+       } else {
+               freesize = t->freesize - PTHREAD_SIZE;
+               freeaddr = t->freeaddr;
         }
         _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_TERMINATE, t,
                         freeaddr, freesize);
-       if (!destroy) return;
-       _pthread_introspection_thread_destroy(t);
  }
  
  static inline void
-_pthread_introspection_thread_terminate(pthread_t t, void *freeaddr,
-               size_t freesize, bool destroy)
+_pthread_introspection_thread_terminate(pthread_t t)
  {
         if (os_fastpath(!_pthread_introspection_hook)) return;
-       _pthread_introspection_hook_callout_thread_terminate(t, freeaddr, freesize,
-                       destroy);
+       _pthread_introspection_hook_callout_thread_terminate(t);
  }
  
  PTHREAD_NOINLINE
  static void
  _pthread_introspection_hook_callout_thread_destroy(pthread_t t)
  {
-       if (t == &_thread) return;
         _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_DESTROY, t, t,
                         PTHREAD_SIZE);
  }
@@ -2506,3 +2579,37 @@ _pthread_introspection_thread_destroy(pthread_t t)
         _pthread_introspection_hook_callout_thread_destroy(t);
  }
  
+#pragma mark libplatform shims
+
+#include <platform/string.h>
+
+// pthread_setup initializes large structures to 0,
+// which the compiler turns into a library call to memset.
+//
+// To avoid linking against Libc, provide a simple wrapper
+// that calls through to the libplatform primitives
+
+#undef memset
+PTHREAD_NOEXPORT
+void *
+memset(void *b, int c, size_t len)
+{
+       return _platform_memset(b, c, len);
+}
+
+#undef bzero
+PTHREAD_NOEXPORT
+void
+bzero(void *s, size_t n)
+{
+       _platform_bzero(s, n);
+}
+
+#undef memcpy
+PTHREAD_NOEXPORT
+void *
+memcpy(void* a, const void* b, unsigned long s)
+{
+       return _platform_memmove(a, b, s);
+}
+
diff --git a/src/pthread_asm.s b/src/pthread_asm.s

index 8fc11c7187690b6f937f8edf8f1214e8f0e87478..90afe461bfec6a8168ae7a1c616940160d91f9d8 100644 (file)
--- a/src/pthread_asm.s
+++ b/src/pthread_asm.s
@@ -21,6 +21,8 @@
   * @APPLE_LICENSE_HEADER_END@
   */
  
+#include "offsets.h"
+
  #if defined(__x86_64__)
  
  #include <mach/i386/syscall_sw.h>
@@ -49,6 +51,51 @@ _thread_start:
         leave
         ret
  
+       .align 2, 0x90
+       .globl _thread_chkstk_darwin
+_thread_chkstk_darwin:
+       .globl ____chkstk_darwin
+____chkstk_darwin: // %rax == alloca size
+       pushq  %rcx
+       leaq   0x10(%rsp), %rcx
+
+       // validate that the frame pointer is on our stack (no alt stack)
+       cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET
+       jb     Lprobe
+       cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET
+       jae    Lprobe
+
+       // validate alloca size
+       subq   %rax, %rcx
+       jb     Lcrash
+       cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET
+       ja     Lcrash
+
+       popq   %rcx
+       retq
+
+Lprobe:
+       // probe the stack when it's not ours (altstack or some shenanigan)
+       cmpq   $0x1000, %rax
+       jb     Lend
+       pushq  %rax
+Lloop:
+       subq   $0x1000, %rcx
+       testq  %rcx, (%rcx)
+       subq   $0x1000, %rax
+       cmpq   $0x1000, %rax
+       ja     Lloop
+       popq   %rax
+Lend:
+       subq   %rax, %rcx
+       testq  %rcx, (%rcx)
+
+       popq   %rcx
+       retq
+
+Lcrash:
+       ud2
+
  #endif
  
  #elif defined(__i386__)
@@ -91,6 +138,56 @@ _thread_start:
         leave
         ret
  
+       .align 2, 0x90
+       .globl _thread_chkstk_darwin
+_thread_chkstk_darwin:
+       .globl ____chkstk_darwin
+____chkstk_darwin: // %eax == alloca size
+       pushl  %ecx
+       pushl  %edx
+       leal   0xc(%esp), %ecx
+
+       // validate that the frame pointer is on our stack (no alt stack)
+       movl   %gs:0x0, %edx    // pthread_self()
+       cmpl   %ecx, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET(%edx)
+       jb     Lprobe
+       movl   _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET(%edx), %edx
+       cmpl   %ecx, %edx
+       jae    Lprobe
+
+       // validate alloca size
+       subl   %eax, %ecx
+       jb     Lcrash
+       cmpl   %ecx, %edx
+       ja     Lcrash
+
+       popl   %edx
+       popl   %ecx
+       retl
+
+Lprobe:
+       // probe the stack when it's not ours (altstack or some shenanigan)
+       cmpl   $0x1000, %eax
+       jb     Lend
+       pushl  %eax
+Lloop:
+       subl   $0x1000, %ecx
+       testl  %ecx, (%ecx)
+       subl   $0x1000, %eax
+       cmpl   $0x1000, %eax
+       ja     Lloop
+       popl   %eax
+Lend:
+       subl   %eax, %ecx
+       testl  %ecx, (%ecx)
+
+       popl   %edx
+       popl   %ecx
+       retl
+
+Lcrash:
+       ud2
+
  #endif
  
  #elif defined(__arm__)
diff --git a/src/pthread_cancelable.c b/src/pthread_cancelable.c

index 894178c810e0ef8ce6336efc16165fe3d94cf5bd..8bb9c08a7c3e1aa1a418c1c649a85cb046a02e96 100644 (file)
--- a/src/pthread_cancelable.c
+++ b/src/pthread_cancelable.c
@@ -60,10 +60,10 @@
  #include <sys/resource.h>
  #include <sys/sysctl.h>
  #include <sys/queue.h>
+#include <sys/ulock.h>
  #include <machine/vmparam.h>
  #include <mach/vm_statistics.h>
  
-extern int __unix_conforming;
  extern int _pthread_cond_wait(pthread_cond_t *cond,
                         pthread_mutex_t *mutex,
                         const struct timespec *abstime,
@@ -73,16 +73,27 @@ extern int __sigwait(const sigset_t *set, int *sig);
  extern int __pthread_sigmask(int, const sigset_t *, sigset_t *);
  extern int __pthread_markcancel(mach_port_t);
  extern int __pthread_canceled(int);
+extern int __semwait_signal_nocancel(int, int, int, int, __int64_t, __int32_t);
  
-#ifdef VARIANT_CANCELABLE
-extern int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, __int64_t tv_sec, __int32_t tv_nsec);
-#else
-extern int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, __int64_t tv_sec, __int32_t tv_nsec)  __asm__("___semwait_signal_nocancel");
-#endif
  
  PTHREAD_NOEXPORT
-int _pthread_join(pthread_t thread, void **value_ptr, int conforming,
-               int (*_semwait_signal)(int, int, int, int, __int64_t, __int32_t));
+int _pthread_join(pthread_t thread, void **value_ptr, int conforming);
+
+static inline int
+_pthread_conformance(void)
+{
+#if __DARWIN_UNIX03
+       if (__unix_conforming == 0)
+               __unix_conforming = 1;
+#ifdef VARIANT_CANCELABLE
+       return PTHREAD_CONFORM_UNIX03_CANCELABLE;
+#else /* !VARIANT_CANCELABLE */
+       return PTHREAD_CONFORM_UNIX03_NOCANCEL;
+#endif
+#else /* __DARWIN_UNIX03 */
+       return PTHREAD_CONFORM_DARWIN_LEGACY;
+#endif /* __DARWIN_UNIX03 */
+}
  
  #ifndef VARIANT_CANCELABLE
  
@@ -111,7 +122,7 @@ pthread_cancel(pthread_t thread)
                 __unix_conforming = 1;
  #endif /* __DARWIN_UNIX03 */
  
-       if (!_pthread_is_valid(thread, 0, NULL)) {
+       if (!_pthread_is_valid(thread, NULL)) {
                 return(ESRCH);
         }
  
@@ -135,15 +146,7 @@ pthread_cancel(pthread_t thread)
  void
  pthread_testcancel(void)
  {
-       pthread_t self = pthread_self();
-
-#if __DARWIN_UNIX03
-       if (__unix_conforming == 0)
-               __unix_conforming = 1;
-       _pthread_testcancel(self, 1);
-#else /* __DARWIN_UNIX03 */
-       _pthread_testcancel(self, 0);
-#endif /* __DARWIN_UNIX03 */
+       _pthread_testcancel(_pthread_conformance());
  }
  
  #ifndef BUILDING_VARIANT /* [ */
@@ -154,23 +157,32 @@ _pthread_exit_if_canceled(int error)
  {
         if (((error & 0xff) == EINTR) && __unix_conforming && (__pthread_canceled(0) == 0)) {
                 pthread_t self = pthread_self();
-               if (self != NULL) {
-                       self->cancel_error = error;
-               }
+
+               self->cancel_error = error;
+               self->canceled = true;
                 pthread_exit(PTHREAD_CANCELED);
         }
  }
  
  
-PTHREAD_NOEXPORT_VARIANT
-void
-_pthread_testcancel(pthread_t thread, int isconforming)
+static inline bool
+_pthread_is_canceled(pthread_t thread)
  {
         const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-
         int state = os_atomic_load2o(thread, cancel_state, seq_cst);
-       if ((state & flags) == flags) {
-               pthread_exit(isconforming ? PTHREAD_CANCELED : 0);
+       return (state & flags) == flags;
+}
+
+PTHREAD_NOEXPORT_VARIANT
+void
+_pthread_testcancel(int isconforming)
+{
+       pthread_t self = pthread_self();
+       if (_pthread_is_canceled(self)) {
+               // 4597450: begin
+               self->canceled = (isconforming != PTHREAD_CONFORM_DARWIN_LEGACY);
+               // 4597450: end
+               pthread_exit(isconforming ? PTHREAD_CANCELED : NULL);
         }
  }
  
@@ -179,7 +191,6 @@ void
  _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport)
  {
         const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-
         int state = os_atomic_or2o(thread, cancel_state,
                         _PTHREAD_CANCEL_INITIALIZED, relaxed);
         if ((state & flags) == flags && __unix_conforming) {
@@ -187,35 +198,14 @@ _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport)
         }
  }
  
-PTHREAD_NOEXPORT
-void *
-_pthread_get_exit_value(pthread_t thread, int conforming)
-{
-       const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-       void *value = thread->exit_value;
-
-       if (conforming) {
-               int state = os_atomic_load2o(thread, cancel_state, seq_cst);
-               if ((state & flags) == flags) {
-                       value = PTHREAD_CANCELED;
-               }
-       }
-       return value;
-}
-
  /* When a thread exits set the cancellation state to DISABLE and DEFERRED */
  PTHREAD_NOEXPORT
  void
-_pthread_setcancelstate_exit(pthread_t thread, void *value_ptr, int conforming)
+_pthread_setcancelstate_exit(pthread_t thread, void *value_ptr)
  {
         _pthread_update_cancel_state(thread,
                         _PTHREAD_CANCEL_STATE_MASK | _PTHREAD_CANCEL_TYPE_MASK,
                         PTHREAD_CANCEL_DISABLE | PTHREAD_CANCEL_DEFERRED);
-       if (value_ptr == PTHREAD_CANCELED) {
-               _PTHREAD_LOCK(thread->lock);
-               thread->detached |= _PTHREAD_WASCANCEL; // 4597450
-               _PTHREAD_UNLOCK(thread->lock);
-       }
  }
  
  #endif /* !BUILDING_VARIANT ] */
@@ -227,30 +217,30 @@ PTHREAD_ALWAYS_INLINE
  static inline int
  _pthread_setcancelstate_internal(int state, int *oldstateptr, int conforming)
  {
-       pthread_t self;
+       pthread_t self = pthread_self();
  
         switch (state) {
-               case PTHREAD_CANCEL_ENABLE:
-                       if (conforming) {
-                               __pthread_canceled(1);
-                       }
-                       break;
-               case PTHREAD_CANCEL_DISABLE:
-                       if (conforming) {
-                               __pthread_canceled(2);
-                       }
-                       break;
-               default:
-                       return EINVAL;
+       case PTHREAD_CANCEL_ENABLE:
+               if (conforming) {
+                       __pthread_canceled(1);
+               }
+               break;
+       case PTHREAD_CANCEL_DISABLE:
+               if (conforming) {
+                       __pthread_canceled(2);
+               }
+               break;
+       default:
+               return EINVAL;
         }
  
-       self = pthread_self();
         int oldstate = _pthread_update_cancel_state(self, _PTHREAD_CANCEL_STATE_MASK, state);
         if (oldstateptr) {
                 *oldstateptr = oldstate & _PTHREAD_CANCEL_STATE_MASK;
         }
         if (!conforming) {
-               _pthread_testcancel(self, 0);  /* See if we need to 'die' now... */
+               /* See if we need to 'die' now... */
+               _pthread_testcancel(PTHREAD_CONFORM_DARWIN_LEGACY);
         }
         return 0;
  }
@@ -292,7 +282,8 @@ pthread_setcanceltype(int type, int *oldtype)
                 *oldtype = oldstate & _PTHREAD_CANCEL_TYPE_MASK;
         }
  #if !__DARWIN_UNIX03
-       _pthread_testcancel(self, 0);  /* See if we need to 'die' now... */
+       /* See if we need to 'die' now... */
+       _pthread_testcancel(PTHREAD_CONFORM_DARWIN_LEGACY);
  #endif /* __DARWIN_UNIX03 */
         return (0);
  }
@@ -315,76 +306,196 @@ pthread_sigmask(int how, const sigset_t * set, sigset_t * oset)
  
  #ifndef BUILDING_VARIANT /* [ */
  
-static void
-__posix_join_cleanup(void *arg)
+typedef struct pthread_join_context_s {
+       pthread_t   waiter;
+       void      **value_ptr;
+       mach_port_t kport;
+       semaphore_t custom_stack_sema;
+       bool        detached;
+} pthread_join_context_s, *pthread_join_context_t;
+
+static inline void *
+_pthread_get_exit_value(pthread_t thread)
  {
-       pthread_t thread = (pthread_t)arg;
+       if (__unix_conforming && _pthread_is_canceled(thread)) {
+               return PTHREAD_CANCELED;
+       }
+       return thread->tl_exit_value;
+}
  
-       _PTHREAD_LOCK(thread->lock);
-       /* leave another thread to join */
-       thread->joiner = (struct _pthread *)NULL;
-       _PTHREAD_UNLOCK(thread->lock);
+// called with _pthread_list_lock held
+PTHREAD_NOEXPORT
+semaphore_t
+_pthread_joiner_prepost_wake(pthread_t thread)
+{
+       pthread_join_context_t ctx = thread->tl_join_ctx;
+       semaphore_t sema = MACH_PORT_NULL;
+
+       if (thread->tl_joinable) {
+               sema = ctx->custom_stack_sema;
+               thread->tl_joinable = false;
+       } else {
+               ctx->detached = true;
+               thread->tl_join_ctx = NULL;
+       }
+       if (ctx->value_ptr) *ctx->value_ptr = _pthread_get_exit_value(thread);
+       return sema;
+}
+
+static inline bool
+_pthread_joiner_abort_wait(pthread_t thread, pthread_join_context_t ctx)
+{
+       bool aborted = false;
+
+       _PTHREAD_LOCK(_pthread_list_lock);
+       if (!ctx->detached && thread->tl_exit_gate != MACH_PORT_DEAD) {
+               /*
+                * _pthread_joiner_prepost_wake() didn't happen
+                * allow another thread to join
+                */
+#if DEBUG
+               PTHREAD_ASSERT(thread->tl_join_ctx == ctx);
+#endif
+               thread->tl_join_ctx = NULL;
+               thread->tl_exit_gate = MACH_PORT_NULL;
+               aborted = true;
+       }
+       _PTHREAD_UNLOCK(_pthread_list_lock);
+       return aborted;
+}
+
+static int
+_pthread_joiner_wait(pthread_t thread, pthread_join_context_t ctx, int conforming)
+{
+       uint32_t *exit_gate = &thread->tl_exit_gate;
+       int ulock_op = UL_UNFAIR_LOCK | ULF_NO_ERRNO;
+
+       if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+               ulock_op |= ULF_WAIT_CANCEL_POINT;
+       }
+
+       for (;;) {
+               uint32_t cur = os_atomic_load(exit_gate, acquire);
+               if (cur == MACH_PORT_DEAD) {
+                       break;
+               }
+               if (os_unlikely(cur != ctx->kport)) {
+                       PTHREAD_CLIENT_CRASH(cur, "pthread_join() state corruption");
+               }
+               int ret = __ulock_wait(ulock_op, exit_gate, ctx->kport, 0);
+               switch (-ret) {
+               case 0:
+               case EFAULT:
+                       break;
+               case EINTR:
+                       /*
+                        * POSIX says:
+                        *
+                        *   As specified, either the pthread_join() call is canceled, or it
+                        *   succeeds, but not both. The difference is obvious to the
+                        *   application, since either a cancellation handler is run or
+                        *   pthread_join() returns.
+                        *
+                        * When __ulock_wait() returns EINTR, we check if we have been
+                        * canceled, and if we have, we try to abort the wait.
+                        *
+                        * If we can't, it means the other thread finished the join while we
+                        * were being canceled and commited the waiter to return from
+                        * pthread_join(). Returning from the join then takes precedence
+                        * over the cancelation which will be acted upon at the next
+                        * cancelation point.
+                        */
+                       if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE &&
+                                       _pthread_is_canceled(ctx->waiter)) {
+                               if (_pthread_joiner_abort_wait(thread, ctx)) {
+                                       ctx->waiter->canceled = true;
+                                       pthread_exit(PTHREAD_CANCELED);
+                               }
+                       }
+                       break;
+               }
+       }
+
+       bool cleanup = false;
+
+       _PTHREAD_LOCK(_pthread_list_lock);
+       // If pthread_detach() was called, we can't safely dereference the thread,
+       // else, decide who gets to deallocate the thread (see _pthread_terminate).
+       if (!ctx->detached) {
+#if DEBUG
+               PTHREAD_ASSERT(thread->tl_join_ctx == ctx);
+#endif
+               thread->tl_join_ctx = NULL;
+               cleanup = thread->tl_joiner_cleans_up;
+       }
+       _PTHREAD_UNLOCK(_pthread_list_lock);
+
+       if (cleanup) {
+               _pthread_deallocate(thread, false);
+       }
+       return 0;
  }
  
  PTHREAD_NOEXPORT PTHREAD_NOINLINE
  int
-_pthread_join(pthread_t thread, void **value_ptr, int conforming,
-               int (*_semwait_signal)(int, int, int, int, __int64_t, __int32_t))
+_pthread_join(pthread_t thread, void **value_ptr, int conforming)
  {
-       int res = 0;
         pthread_t self = pthread_self();
-       kern_return_t kern_res;
-       semaphore_t joinsem, death = (semaphore_t)os_get_cached_semaphore();
+       pthread_join_context_s ctx = {
+               .waiter = self,
+               .value_ptr = value_ptr,
+               .kport = MACH_PORT_NULL,
+               .custom_stack_sema = MACH_PORT_NULL,
+       };
+       int res = 0;
+       kern_return_t kr;
  
-       if (!_pthread_is_valid(thread, PTHREAD_IS_VALID_LOCK_THREAD, NULL)) {
-               res = ESRCH;
-               goto out;
+       if (!_pthread_validate_thread_and_list_lock(thread)) {
+               return ESRCH;
         }
  
-       if (thread->sig != _PTHREAD_SIG) {
-               res = ESRCH;
-       } else if ((thread->detached & PTHREAD_CREATE_DETACHED) ||
-                       !(thread->detached & PTHREAD_CREATE_JOINABLE) ||
-                       (thread->joiner != NULL)) {
+       if (!thread->tl_joinable || (thread->tl_join_ctx != NULL)) {
                 res = EINVAL;
-       } else if (thread == self || (self != NULL && self->joiner == thread)) {
+       } else if (thread == self ||
+                       (self->tl_join_ctx && self->tl_join_ctx->waiter == thread)) {
                 res = EDEADLK;
+       } else if (thread->tl_exit_gate == MACH_PORT_DEAD) {
+               TAILQ_REMOVE(&__pthread_head, thread, tl_plist);
+#if DEBUG
+               PTHREAD_ASSERT(thread->tl_joiner_cleans_up);
+#endif
+               thread->tl_joinable = false;
+               if (value_ptr) *value_ptr = _pthread_get_exit_value(thread);
+       } else {
+               ctx.kport = _pthread_kernel_thread(thread);
+               thread->tl_exit_gate = ctx.kport;
+               thread->tl_join_ctx = &ctx;
+               if (thread->tl_has_custom_stack) {
+                       ctx.custom_stack_sema = (semaphore_t)os_get_cached_semaphore();
+               }
         }
-       if (res != 0) {
-               _PTHREAD_UNLOCK(thread->lock);
-               goto out;
-       }
+       _PTHREAD_UNLOCK(_pthread_list_lock);
  
-       joinsem = thread->joiner_notify;
-       if (joinsem == SEMAPHORE_NULL) {
-               thread->joiner_notify = joinsem = death;
-               death = MACH_PORT_NULL;
+       if (res == 0) {
+               if (ctx.kport == MACH_PORT_NULL) {
+                       _pthread_deallocate(thread, false);
+               } else {
+                       res = _pthread_joiner_wait(thread, &ctx, conforming);
+               }
         }
-       thread->joiner = self;
-       _PTHREAD_UNLOCK(thread->lock);
-
-       if (conforming) {
-               /* Wait for it to signal... */
-               pthread_cleanup_push(__posix_join_cleanup, (void *)thread);
-               do {
-                       res = _semwait_signal(joinsem, 0, 0, 0, 0, 0);
-               } while ((res < 0) && (errno == EINTR));
-               pthread_cleanup_pop(0);
-       } else {
-               /* Wait for it to signal... */
-               kern_return_t (*_semaphore_wait)(semaphore_t) =
-                               (void*)_semwait_signal;
+       if (res == 0 && ctx.custom_stack_sema && !ctx.detached) {
+               // threads with a custom stack need to make sure _pthread_terminate
+               // returned before the joiner is unblocked, the joiner may quickly
+               // deallocate the stack with rather dire consequences.
+               //
+               // When we reach this point we know the pthread_join has to succeed
+               // so this can't be a cancelation point.
                 do {
-                       kern_res = _semaphore_wait(joinsem);
-               } while (kern_res != KERN_SUCCESS);
+                       kr = __semwait_signal_nocancel(ctx.custom_stack_sema, 0, 0, 0, 0, 0);
+               } while (kr != KERN_SUCCESS);
         }
-
-       os_put_cached_semaphore((os_semaphore_t)joinsem);
-       res = _pthread_join_cleanup(thread, value_ptr, conforming);
-
-out:
-       if (death) {
-               os_put_cached_semaphore(death);
+       if (ctx.custom_stack_sema) {
+               os_put_cached_semaphore(ctx.custom_stack_sema);
         }
         return res;
  }
@@ -398,82 +509,45 @@ out:
  int
  pthread_join(pthread_t thread, void **value_ptr)
  {
-#if __DARWIN_UNIX03
-       if (__unix_conforming == 0)
-               __unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-       _pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
-       return _pthread_join(thread, value_ptr, 1, __semwait_signal);
-#else
-       return _pthread_join(thread, value_ptr, 0, (void*)semaphore_wait);
-#endif /* __DARWIN_UNIX03 */
-
+       int conforming = _pthread_conformance();
+       if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+               _pthread_testcancel(conforming);
+       }
+       return _pthread_join(thread, value_ptr, conforming);
  }
  
  int
-pthread_cond_wait(pthread_cond_t *cond,
-                 pthread_mutex_t *mutex)
+pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
  {
-       int conforming;
-#if __DARWIN_UNIX03
-
-       if (__unix_conforming == 0)
-               __unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-       conforming = 1;
-#else /* !VARIANT_CANCELABLE */
-       conforming = -1;
-#endif /* VARIANT_CANCELABLE */
-#else /* __DARWIN_UNIX03 */
-       conforming = 0;
-#endif /* __DARWIN_UNIX03 */
-       return (_pthread_cond_wait(cond, mutex, (struct timespec *)NULL, 0, conforming));
+       return _pthread_cond_wait(cond, mutex, NULL, 0, _pthread_conformance());
  }
  
  int
-pthread_cond_timedwait(pthread_cond_t *cond,
-                      pthread_mutex_t *mutex,
-                      const struct timespec *abstime)
+pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
+               const struct timespec *abstime)
  {
-       int conforming;
-#if __DARWIN_UNIX03
-       if (__unix_conforming == 0)
-               __unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-       conforming = 1;
-#else /* !VARIANT_CANCELABLE */
-       conforming = -1;
-#endif /* VARIANT_CANCELABLE */
-#else /* __DARWIN_UNIX03 */
-        conforming = 0;
-#endif /* __DARWIN_UNIX03 */
-
-       return (_pthread_cond_wait(cond, mutex, abstime, 0, conforming));
+       return _pthread_cond_wait(cond, mutex, abstime, 0, _pthread_conformance());
  }
  
  int
  sigwait(const sigset_t * set, int * sig)
  {
  #if __DARWIN_UNIX03
-       int err = 0;
+       int err = 0, conformance = _pthread_conformance();
  
         if (__unix_conforming == 0)
                 __unix_conforming = 1;
  
-#ifdef VARIANT_CANCELABLE
-       _pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
+       if (conformance == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+               _pthread_testcancel(conformance);
+       }
  
         if (__sigwait(set, sig) == -1) {
                 err = errno;
  
-#ifdef VARIANT_CANCELABLE
-               _pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
+               if (conformance == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+                       _pthread_testcancel(conformance);
+               }
  
                 /*
                  * EINTR that isn't a result of pthread_cancel()
diff --git a/src/pthread_cond.c b/src/pthread_cond.c

index be55e1d164942c7ffad560259747fb93e206c999..79e38baa07b6cdc481c3bc6762466fa61a58a603 100644 (file)
--- a/src/pthread_cond.c
+++ b/src/pthread_cond.c
@@ -59,7 +59,6 @@
  #endif /* PLOCKSTAT */
  
  extern int __gettimeofday(struct timeval *, struct timezone *);
-extern void _pthread_testcancel(pthread_t thread, int isconforming);
  
  PTHREAD_NOEXPORT
  int _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex,
@@ -88,8 +87,8 @@ COND_GETSEQ_ADDR(_pthread_cond *cond,
  #ifndef BUILDING_VARIANT /* [ */
  
  static void _pthread_cond_cleanup(void *arg);
-static void _pthread_cond_updateval(_pthread_cond * cond, int error,
-               uint32_t updateval);
+static void _pthread_cond_updateval(_pthread_cond *cond, _pthread_mutex *mutex,
+               int error, uint32_t updateval);
  
  
  int
@@ -401,7 +400,7 @@ _pthread_cond_signal(pthread_cond_t *ocond, bool broadcast, mach_port_t thread)
         }
  
         if (updateval != (uint32_t)-1 && updateval != 0) {
-               _pthread_cond_updateval(cond, 0, updateval);
+               _pthread_cond_updateval(cond, NULL, 0, updateval);
         }
  
         return 0;
@@ -449,8 +448,8 @@ pthread_cond_signal(pthread_cond_t *ocond)
   * Suspend waiting for a condition variable.
   * Note: we have to keep a list of condition variables which are using
   * this same mutex variable so we can detect invalid 'destroy' sequences.
- * If isconforming < 0, we skip the _pthread_testcancel(), but keep the
- * remaining conforming behavior..
+ * If conformance is not cancelable, we skip the _pthread_testcancel(),
+ * but keep the remaining conforming behavior..
   */
  PTHREAD_NOEXPORT PTHREAD_NOINLINE
  int
@@ -458,7 +457,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                         pthread_mutex_t *omutex,
                         const struct timespec *abstime,
                         int isRelative,
-                       int isconforming)
+                       int conforming)
  {
         int res;
         _pthread_cond *cond = (_pthread_cond *)ocond;
@@ -477,13 +476,13 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                 return res;
         }
  
-       if (isconforming) {
+       if (conforming) {
                 if (!_pthread_mutex_check_signature(mutex) &&
                                 !_pthread_mutex_check_signature_init(mutex)) {
                         return EINVAL;
                 }
-               if (isconforming > 0) {
-                       _pthread_testcancel(pthread_self(), 1);
+               if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+                       _pthread_testcancel(conforming);
                 }
         }
  
@@ -505,7 +504,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                         if (then.tv_sec < 0 || (then.tv_sec == 0 && then.tv_nsec == 0)) {
                                 return ETIMEDOUT;
                         }
-                       if (isconforming &&
+                       if (conforming &&
                             (abstime->tv_sec < 0 ||
                              abstime->tv_nsec < 0 ||
                              abstime->tv_nsec >= NSEC_PER_SEC)) {
@@ -518,7 +517,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                                 return ETIMEDOUT;
                         }
                 }
-               if (isconforming && (then.tv_sec < 0 || then.tv_nsec < 0)) {
+               if (conforming && (then.tv_sec < 0 || then.tv_nsec < 0)) {
                         return EINVAL;
                 }
                 if (then.tv_nsec >= NSEC_PER_SEC) {
@@ -567,10 +566,10 @@ _pthread_cond_wait(pthread_cond_t *ocond,
         cvlsgen = ((uint64_t)(ulval | savebits)<< 32) | nlval;
  
         // SUSv3 requires pthread_cond_wait to be a cancellation point
-       if (isconforming) {
+       if (conforming) {
                 pthread_cleanup_push(_pthread_cond_cleanup, (void *)cond);
                 updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec);
-               _pthread_testcancel(pthread_self(), isconforming);
+               _pthread_testcancel(conforming);
                 pthread_cleanup_pop(0);
         } else {
                 updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec);
@@ -592,12 +591,12 @@ _pthread_cond_wait(pthread_cond_t *ocond,
                 }
  
                 // add unlock ref to show one less waiter
-               _pthread_cond_updateval(cond, err, 0);
+               _pthread_cond_updateval(cond, mutex, err, 0);
         } else if (updateval != 0) {
                 // Successful wait
                 // The return due to prepost and might have bit states
                 // update S and return for prepo if needed
-               _pthread_cond_updateval(cond, 0, updateval);
+               _pthread_cond_updateval(cond, mutex, 0, updateval);
         }
  
         pthread_mutex_lock(omutex);
@@ -609,25 +608,20 @@ static void
  _pthread_cond_cleanup(void *arg)
  {
         _pthread_cond *cond = (_pthread_cond *)arg;
+       pthread_t thread = pthread_self();
         pthread_mutex_t *mutex;
  
  // 4597450: begin
-       pthread_t thread = pthread_self();
-       int thcanceled = 0;
-
-       _PTHREAD_LOCK(thread->lock);
-       thcanceled = (thread->detached & _PTHREAD_WASCANCEL);
-       _PTHREAD_UNLOCK(thread->lock);
-
-       if (thcanceled == 0) {
+       if (!thread->canceled) {
                 return;
         }
-
  // 4597450: end
+
         mutex = (pthread_mutex_t *)cond->busy;
  
         // add unlock ref to show one less waiter
-       _pthread_cond_updateval(cond, thread->cancel_error, 0);
+       _pthread_cond_updateval(cond, (_pthread_mutex *)mutex,
+                       thread->cancel_error, 0);
  
         /*
         ** Can't do anything if this fails -- we're on the way out
@@ -637,11 +631,9 @@ _pthread_cond_cleanup(void *arg)
         }
  }
  
-#define ECVCERORR       256
-#define ECVPERORR       512
-
  static void
-_pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
+_pthread_cond_updateval(_pthread_cond *cond, _pthread_mutex *mutex,
+               int error, uint32_t updateval)
  {
         int needclearpre;
  
@@ -653,10 +645,10 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
  
         if (error != 0) {
                 updateval = PTHRW_INC;
-               if ((error & ECVCERORR) != 0) {
+               if (error & ECVCLEARED) {
                         updateval |= PTH_RWS_CV_CBIT;
                 }
-               if ((error & ECVPERORR) != 0) {
+               if (error & ECVPREPOST) {
                         updateval |= PTH_RWS_CV_PBIT;
                 }
         }
@@ -675,7 +667,10 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
                 oldval64 = (((uint64_t)scntval) << 32);
                 oldval64 |= lcntval;
  
-               if (diffgen <= 0) {
+               PTHREAD_TRACE(psynch_cvar_updateval | DBG_FUNC_START, cond, oldval64,
+                               updateval, 0);
+
+               if (diffgen <= 0 && !is_rws_pbit_set(updateval)) {
                         /* TBD: Assert, should not be the case */
                         /* validate it is spurious and return */
                         newval64 = oldval64;
@@ -700,19 +695,22 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
                 }
         } while (!os_atomic_cmpxchg(c_lsseqaddr, oldval64, newval64, seq_cst));
  
+       PTHREAD_TRACE(psynch_cvar_updateval | DBG_FUNC_END, cond, newval64,
+                       (uint64_t)diffgen << 32 | needclearpre, 0);
+
         if (diffgen > 0) {
                 // if L == S, then reset associated mutex
                 if ((nsval & PTHRW_COUNT_MASK) == (lcntval & PTHRW_COUNT_MASK)) {
                         cond->busy = NULL;
                 }
+       }
  
-               if (needclearpre != 0) {
-                       uint32_t flags = 0;
-                       if (cond->pshared == PTHREAD_PROCESS_SHARED) {
-                               flags |= _PTHREAD_MTX_OPT_PSHARED;
-                       }
-                       (void)__psynch_cvclrprepost(cond, lcntval, ucntval, nsval, 0, lcntval, flags);
+       if (needclearpre) {
+               uint32_t flags = 0;
+               if (cond->pshared == PTHREAD_PROCESS_SHARED) {
+                       flags |= _PTHREAD_MTX_OPT_PSHARED;
                 }
+               (void)__psynch_cvclrprepost(cond, lcntval, ucntval, nsval, 0, lcntval, flags);
         }
  }
  
diff --git a/src/pthread_dependency.c b/src/pthread_dependency.c

new file mode 100644 (file)

index 0000000..282dfc3
--- /dev/null
+++ b/src/pthread_dependency.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include "resolver.h"
+#include "internal.h"
+#include "dependency_private.h"
+#include <sys/ulock.h>
+
+#define PREREQUISITE_FULFILLED  (~0u)
+
+PTHREAD_NOEXPORT
+void _pthread_dependency_fulfill_slow(pthread_dependency_t *pr, uint32_t old);
+
+OS_ALWAYS_INLINE
+static inline mach_port_t
+_pthread_dependency_self(void)
+{
+       void *v = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF);
+       return (mach_port_t)(uintptr_t)v;
+}
+
+void
+pthread_dependency_init_np(pthread_dependency_t *pr, pthread_t pth,
+               pthread_dependency_attr_t *attrs)
+{
+       if (attrs) *(volatile char *)attrs;
+       *pr = (pthread_dependency_t)PTHREAD_DEPENDENCY_INITIALIZER_NP(pth);
+}
+
+OS_NOINLINE
+void
+_pthread_dependency_fulfill_slow(pthread_dependency_t *pr, uint32_t old)
+{
+       if (old == PREREQUISITE_FULFILLED) {
+               PTHREAD_CLIENT_CRASH(0, "Fufilling pthread_dependency_t twice");
+       }
+       if (os_unlikely(old != _pthread_dependency_self())) {
+               PTHREAD_CLIENT_CRASH(old, "Fulfilled a dependency "
+                               "not owned by current thread");
+       }
+
+       int ret = __ulock_wake(UL_UNFAIR_LOCK | ULF_NO_ERRNO, &pr->__pdep_opaque1, 0);
+       switch (-ret) {
+       case 0:
+       case ENOENT:
+               return;
+       default:
+               PTHREAD_INTERNAL_CRASH(-ret, "__ulock_wake() failed");
+       }
+}
+
+
+void
+pthread_dependency_fulfill_np(pthread_dependency_t *pr, void *value)
+{
+       uint32_t old;
+
+       pr->__pdep_opaque2 = (uint64_t)(uintptr_t)value;
+       old = os_atomic_xchg(&pr->__pdep_opaque1, PREREQUISITE_FULFILLED, release);
+
+       if (old != 0) _pthread_dependency_fulfill_slow(pr, old);
+}
+
+void *
+pthread_dependency_wait_np(pthread_dependency_t *pr)
+{
+       if (os_atomic_cmpxchg(&pr->__pdep_opaque1, 0, pr->__pdep_owner, relaxed)) {
+               int ret;
+       again:
+               ret = __ulock_wait(UL_UNFAIR_LOCK | ULF_NO_ERRNO, &pr->__pdep_opaque1,
+                               pr->__pdep_owner, 0);
+               switch (-ret) {
+               case EFAULT:
+                       if (pr->__pdep_opaque1 == pr->__pdep_owner) goto again;
+               case 0:
+                       break;
+               case EOWNERDEAD:
+                       PTHREAD_CLIENT_CRASH(pr->__pdep_owner, "Waiting on orphaned dependency");
+               default:
+                       PTHREAD_CLIENT_CRASH(-ret, "__ulock_wait() failed");
+               }
+       }
+
+       uint32_t cur = os_atomic_load(&pr->__pdep_opaque1, acquire);
+       if (cur == PREREQUISITE_FULFILLED) {
+               return (void *)(uintptr_t)pr->__pdep_opaque2;
+       }
+       PTHREAD_CLIENT_CRASH(cur, "Corrupted pthread_dependency_t");
+}
+
diff --git a/src/pthread_mutex.c b/src/pthread_mutex.c

index a68503c09b2ae4414ea4eb7e65198f00f93c56dc..edc97ee38cbcdb10948eed3da81fd43d4f748084 100644 (file)
--- a/src/pthread_mutex.c
+++ b/src/pthread_mutex.c
@@ -54,8 +54,6 @@
  #include "internal.h"
  #include "kern/kern_trace.h"
  
-extern int __unix_conforming;
-
  #ifndef BUILDING_VARIANT /* [ */
  
  #ifdef PLOCKSTAT
@@ -85,31 +83,73 @@ _plockstat_never_fired(void)
  
  #define PTHREAD_MUTEX_INIT_UNUSED 1
  
+PTHREAD_NOEXPORT PTHREAD_WEAK
+int _pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_fairshare_lock_slow(_pthread_mutex *mutex, bool trylock);
+
  PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
-int _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock);
+int _pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock);
  
  PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
-int _pthread_mutex_unlock_slow(pthread_mutex_t *omutex);
+int _pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex);
  
  PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
  int _pthread_mutex_corruption_abort(_pthread_mutex *mutex);
  
-extern int __pthread_mutex_default_policy PTHREAD_NOEXPORT;
+extern int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT;
+
+
+int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT =
+               _PTHREAD_MTX_OPT_POLICY_DEFAULT;
  
+static inline bool
+_pthread_mutex_policy_validate(int policy)
+{
+       return (policy >= 0 && policy < _PTHREAD_MUTEX_POLICY_LAST);
+}
  
-int __pthread_mutex_default_policy PTHREAD_NOEXPORT =
-               _PTHREAD_MUTEX_POLICY_FAIRSHARE;
+static inline int
+_pthread_mutex_policy_to_opt(int policy)
+{
+       switch (policy) {
+       case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP:
+               return _PTHREAD_MTX_OPT_POLICY_FAIRSHARE;
+       case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP:
+               return _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+       default:
+               __builtin_unreachable();
+       }
+}
  
  PTHREAD_NOEXPORT
  void
  _pthread_mutex_global_init(const char *envp[],
                 struct _pthread_registration_data *registration_data)
  {
+
+       int opt = _PTHREAD_MTX_OPT_POLICY_DEFAULT;
+       if (registration_data->mutex_default_policy) {
+               int policy = registration_data->mutex_default_policy;
+               if (_pthread_mutex_policy_validate(policy)) {
+                       opt = _pthread_mutex_policy_to_opt(policy);
+               }
+       }
+
         const char *envvar = _simple_getenv(envp, "PTHREAD_MUTEX_DEFAULT_POLICY");
-       if ((envvar && (envvar[0] - '0') == _PTHREAD_MUTEX_POLICY_FIRSTFIT) ||
-                       (registration_data->mutex_default_policy ==
-                               _PTHREAD_MUTEX_POLICY_FIRSTFIT)) {
-               __pthread_mutex_default_policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+       if (envvar) {
+               int policy = envvar[0] - '0';
+               if (_pthread_mutex_policy_validate(policy)) {
+                       opt = _pthread_mutex_policy_to_opt(policy);
+               }
+       }
+
+       if (opt != __pthread_mutex_default_opt_policy) {
+               __pthread_mutex_default_opt_policy = opt;
         }
  }
  
@@ -162,7 +202,7 @@ mutex_seq_load(mutex_seq *seqaddr, mutex_seq *oldseqval)
  #define mutex_seq_atomic_load(seqaddr, oldseqval, m) \
                 mutex_seq_atomic_load_##m(seqaddr, oldseqval)
  
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
  static inline bool
  mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
                 mutex_seq *newseqval)
@@ -171,7 +211,7 @@ mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
                         newseqval->seq_LU, &oldseqval->seq_LU, relaxed);
  }
  
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
  static inline bool
  mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
                 mutex_seq *newseqval)
@@ -180,7 +220,7 @@ mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
                         newseqval->seq_LU, &oldseqval->seq_LU, acquire);
  }
  
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
  static inline bool
  mutex_seq_atomic_cmpxchgv_release(mutex_seq *seqaddr, mutex_seq *oldseqval,
                 mutex_seq *newseqval)
@@ -274,8 +314,16 @@ pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *attr, int *policy)
  {
         int res = EINVAL;
         if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
-               *policy = attr->policy;
-               res = 0;
+               switch (attr->opt) {
+               case _PTHREAD_MTX_OPT_POLICY_FAIRSHARE:
+                       *policy = PTHREAD_MUTEX_POLICY_FAIRSHARE_NP;
+                       res = 0;
+                       break;
+               case _PTHREAD_MTX_OPT_POLICY_FIRSTFIT:
+                       *policy = PTHREAD_MUTEX_POLICY_FIRSTFIT_NP;
+                       res = 0;
+                       break;
+               }
         }
         return res;
  }
@@ -307,7 +355,7 @@ pthread_mutexattr_init(pthread_mutexattr_t *attr)
  {
         attr->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
         attr->protocol = _PTHREAD_DEFAULT_PROTOCOL;
-       attr->policy = __pthread_mutex_default_policy;
+       attr->opt = __pthread_mutex_default_opt_policy;
         attr->type = PTHREAD_MUTEX_DEFAULT;
         attr->sig = _PTHREAD_MUTEX_ATTR_SIG;
         attr->pshared = _PTHREAD_DEFAULT_PSHARED;
@@ -349,12 +397,18 @@ pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *attr, int policy)
  {
         int res = EINVAL;
         if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
+               // <rdar://problem/35844519> the first-fit implementation was broken
+               // pre-Liberty so this mapping exists to ensure that the old first-fit
+               // define (2) is no longer valid when used on older systems.
                 switch (policy) {
-                       case _PTHREAD_MUTEX_POLICY_FAIRSHARE:
-                       case _PTHREAD_MUTEX_POLICY_FIRSTFIT:
-                               attr->policy = policy;
-                               res = 0;
-                               break;
+               case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP:
+                       attr->opt = _PTHREAD_MTX_OPT_POLICY_FAIRSHARE;
+                       res = 0;
+                       break;
+               case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP:
+                       attr->opt = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+                       res = 0;
+                       break;
                 }
         }
         return res;
@@ -412,6 +466,115 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex)
  }
  
  
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_check_init_slow(_pthread_mutex *mutex)
+{
+       int res = EINVAL;
+
+       if (_pthread_mutex_check_signature_init(mutex)) {
+               _PTHREAD_LOCK(mutex->lock);
+               if (_pthread_mutex_check_signature_init(mutex)) {
+                       // initialize a statically initialized mutex to provide
+                       // compatibility for misbehaving applications.
+                       // (unlock should not be the first operation on a mutex)
+                       res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
+               } else if (_pthread_mutex_check_signature(mutex)) {
+                       res = 0;
+               }
+               _PTHREAD_UNLOCK(mutex->lock);
+       } else if (_pthread_mutex_check_signature(mutex)) {
+               res = 0;
+       }
+       if (res != 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
+       }
+       return res;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_check_init(_pthread_mutex *mutex)
+{
+       int res = 0;
+       if (!_pthread_mutex_check_signature(mutex)) {
+               return _pthread_mutex_check_init_slow(mutex);
+       }
+       return res;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_fairshare(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FAIRSHARE);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_firstfit(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_recursive(_pthread_mutex *mutex)
+{
+       return (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE);
+}
+
+PTHREAD_ALWAYS_INLINE
+static int
+_pthread_mutex_lock_handle_options(_pthread_mutex *mutex, bool trylock,
+               uint64_t *tidaddr)
+{
+       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) {
+               // NORMAL does not do EDEADLK checking
+               return 0;
+       }
+
+       uint64_t selfid = _pthread_selfid_direct();
+       if (os_atomic_load(tidaddr, relaxed) == selfid) {
+               if (_pthread_mutex_is_recursive(mutex)) {
+                       if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
+                               mutex->mtxopts.options.lock_count += 1;
+                               return mutex->mtxopts.options.lock_count;
+                       } else {
+                               return -EAGAIN;
+                       }
+               } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
+                       // <rdar://problem/16261552> as per OpenGroup, trylock cannot
+                       // return EDEADLK on a deadlock, it should return EBUSY.
+                       return -EBUSY;
+               } else { /* PTHREAD_MUTEX_ERRORCHECK */
+                       return -EDEADLK;
+               }
+       }
+
+       // Not recursive, or recursive but first lock.
+       return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static int
+_pthread_mutex_unlock_handle_options(_pthread_mutex *mutex, uint64_t *tidaddr)
+{
+       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) {
+               // NORMAL does not do EDEADLK checking
+               return 0;
+       }
+
+       uint64_t selfid = _pthread_selfid_direct();
+       if (os_atomic_load(tidaddr, relaxed) != selfid) {
+               return -EPERM;
+       } else if (_pthread_mutex_is_recursive(mutex) &&
+                       --mutex->mtxopts.options.lock_count) {
+               return 1;
+       }
+       return 0;
+}
+
  /*
   * Sequence numbers and TID:
   *
@@ -444,11 +607,9 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex)
   */
  PTHREAD_ALWAYS_INLINE
  static inline int
-_pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
-               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+_pthread_mutex_fairshare_unlock_updatebits(_pthread_mutex *mutex,
+               uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
  {
-       bool firstfit = (mutex->mtxopts.options.policy ==
-                       _PTHREAD_MUTEX_POLICY_FIRSTFIT);
         uint32_t flags = mutex->mtxopts.value;
         flags &= ~_PTHREAD_MTX_OPT_NOTIFY; // no notification by default
  
@@ -462,27 +623,24 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
         MUTEX_GETTID_ADDR(mutex, &tidaddr);
         uint64_t oldtid, newtid;
  
-       if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-               uint64_t selfid = _pthread_selfid_direct();
-               if (os_atomic_load(tidaddr, relaxed) != selfid) {
-                       PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, EPERM);
-                       return EPERM;
-               } else if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE &&
-                          --mutex->mtxopts.options.lock_count) {
-                       PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
-                       if (flagsp != NULL) {
-                               *flagsp = flags;
-                       }
-                       return 0;
+       int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr);
+       if (res > 0) {
+               // Valid recursive unlock
+               if (flagsp) {
+                       *flagsp = flags;
                 }
+               PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
+               return 0;
+       } else if (res < 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res);
+               return -res;
         }
  
-       bool clearprepost, clearnotify, spurious;
+       bool clearnotify, spurious;
         do {
                 newseq = oldseq;
                 oldtid = os_atomic_load(tidaddr, relaxed);
  
-               clearprepost = false;
                 clearnotify = false;
                 spurious = false;
  
@@ -504,13 +662,7 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
                                 clearnotify = true;
                                 newtid = 0; // clear owner
                         } else {
-                               if (firstfit) {
-                                       // reset E bit so another can acquire meanwhile
-                                       newseq.lgenval &= ~PTH_RWL_EBIT;
-                                       newtid = 0;
-                               } else {
-                                       newtid = PTHREAD_MTX_TID_SWITCHING;
-                               }
+                               newtid = PTHREAD_MTX_TID_SWITCHING;
                                 // need to signal others waiting for mutex
                                 flags |= _PTHREAD_MTX_OPT_NOTIFY;
                         }
@@ -530,21 +682,12 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
  
                 if (clearnotify || spurious) {
                         flags &= ~_PTHREAD_MTX_OPT_NOTIFY;
-                       if (firstfit && (newseq.lgenval & PTH_RWL_PBIT)) {
-                               clearprepost = true;
-                               newseq.lgenval &= ~PTH_RWL_PBIT;
-                       }
                 }
         } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
  
         PTHREAD_TRACE(psynch_mutex_unlock_updatebits, mutex, oldseq.lgenval,
                         newseq.lgenval, oldtid);
  
-       if (clearprepost) {
-               __psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0,
-                               newseq.lgenval, flags | _PTHREAD_MTX_OPT_MUTEX);
-       }
-
         if (mgenp != NULL) {
                 *mgenp = newseq.lgenval;
         }
@@ -561,20 +704,11 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
         return 0;
  }
  
-PTHREAD_NOEXPORT PTHREAD_NOINLINE
-int
-_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp,
-               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
-{
-       return _pthread_mutex_unlock_updatebits(mutex, flagsp, pmtxp, mgenp, ugenp);
-}
-
  PTHREAD_ALWAYS_INLINE
  static inline int
-_pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
+_pthread_mutex_fairshare_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
  {
-       bool firstfit = (mutex->mtxopts.options.policy ==
-                       _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+       bool firstfit = _pthread_mutex_is_firstfit(mutex);
         bool gotlock = true;
  
         mutex_seq *seqaddr;
@@ -585,11 +719,9 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
  
         uint64_t *tidaddr;
         MUTEX_GETTID_ADDR(mutex, &tidaddr);
-       uint64_t oldtid;
  
         do {
                 newseq = oldseq;
-               oldtid = os_atomic_load(tidaddr, relaxed);
  
                 if (firstfit) {
                         // firstfit locks can have the lock stolen out from under a locker
@@ -605,17 +737,14 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
  
                 newseq.lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT;
         } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
-                       relaxed));
+                       acquire));
  
         if (gotlock) {
-               if (!os_atomic_cmpxchg(tidaddr, oldtid, selfid, relaxed)) {
-                       // we own this mutex, nobody should be updating it except us
-                       return _pthread_mutex_corruption_abort(mutex);
-               }
+               os_atomic_store(tidaddr, selfid, relaxed);
         }
  
         PTHREAD_TRACE(psynch_mutex_lock_updatebits, mutex, oldseq.lgenval,
-                       newseq.lgenval, oldtid);
+                       newseq.lgenval, 0);
  
         // failing to take the lock in firstfit returns 1 to force the caller
         // to wait in the kernel
@@ -624,114 +753,36 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
  
  PTHREAD_NOINLINE
  static int
-_pthread_mutex_markprepost(_pthread_mutex *mutex, uint32_t updateval)
-{
-       mutex_seq *seqaddr;
-       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-
-       mutex_seq oldseq, newseq;
-       mutex_seq_load(seqaddr, &oldseq);
-
-       bool clearprepost;
-       do {
-               clearprepost = false;
-               newseq = oldseq;
-
-               /* update the bits */
-               if ((oldseq.lgenval & PTHRW_COUNT_MASK) ==
-                               (oldseq.ugenval & PTHRW_COUNT_MASK)) {
-                       clearprepost = true;
-                       newseq.lgenval &= ~PTH_RWL_PBIT;
-               } else {
-                       newseq.lgenval |= PTH_RWL_PBIT;
-               }
-       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, relaxed));
-
-       if (clearprepost) {
-               __psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0,
-                               newseq.lgenval, mutex->mtxopts.value | _PTHREAD_MTX_OPT_MUTEX);
-       }
-
-       return 0;
-}
-
-PTHREAD_NOINLINE
-static int
-_pthread_mutex_check_init_slow(pthread_mutex_t *omutex)
-{
-       int res = EINVAL;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
-       if (_pthread_mutex_check_signature_init(mutex)) {
-               _PTHREAD_LOCK(mutex->lock);
-               if (_pthread_mutex_check_signature_init(mutex)) {
-                       // initialize a statically initialized mutex to provide
-                       // compatibility for misbehaving applications.
-                       // (unlock should not be the first operation on a mutex)
-                       res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
-               } else if (_pthread_mutex_check_signature(mutex)) {
-                       res = 0;
-               }
-               _PTHREAD_UNLOCK(mutex->lock);
-       } else if (_pthread_mutex_check_signature(mutex)) {
-               res = 0;
-       }
-       if (res != 0) {
-               PLOCKSTAT_MUTEX_ERROR(omutex, res);
-       }
-       return res;
-}
-
-PTHREAD_ALWAYS_INLINE
-static inline int
-_pthread_mutex_check_init(pthread_mutex_t *omutex)
-{
-       int res = 0;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
-       if (!_pthread_mutex_check_signature(mutex)) {
-               return _pthread_mutex_check_init_slow(omutex);
-       }
-       return res;
-}
-
-PTHREAD_NOINLINE
-static int
-_pthread_mutex_lock_wait(pthread_mutex_t *omutex, mutex_seq newseq,
+_pthread_mutex_fairshare_lock_wait(_pthread_mutex *mutex, mutex_seq newseq,
                 uint64_t oldtid)
  {
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
         uint64_t *tidaddr;
         MUTEX_GETTID_ADDR(mutex, &tidaddr);
         uint64_t selfid = _pthread_selfid_direct();
  
-       PLOCKSTAT_MUTEX_BLOCK(omutex);
+       PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex);
         do {
                 uint32_t updateval;
                 do {
-                       updateval = __psynch_mutexwait(omutex, newseq.lgenval,
+                       updateval = __psynch_mutexwait(mutex, newseq.lgenval,
                                         newseq.ugenval, oldtid, mutex->mtxopts.value);
                         oldtid = os_atomic_load(tidaddr, relaxed);
                 } while (updateval == (uint32_t)-1);
  
                 // returns 0 on succesful update; in firstfit it may fail with 1
-       } while (_pthread_mutex_lock_updatebits(mutex, selfid) == 1);
-       PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT);
+       } while (_pthread_mutex_fairshare_lock_updatebits(mutex, selfid) == 1);
+       PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT);
  
         return 0;
  }
  
  PTHREAD_NOEXPORT PTHREAD_NOINLINE
  int
-_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
+_pthread_mutex_fairshare_lock_slow(_pthread_mutex *omutex, bool trylock)
  {
         int res, recursive = 0;
         _pthread_mutex *mutex = (_pthread_mutex *)omutex;
  
-       res = _pthread_mutex_check_init(omutex);
-       if (res != 0) return res;
-
         mutex_seq *seqaddr;
         MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
  
@@ -742,25 +793,14 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
         MUTEX_GETTID_ADDR(mutex, &tidaddr);
         uint64_t oldtid, selfid = _pthread_selfid_direct();
  
-       if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-               if (os_atomic_load(tidaddr, relaxed) == selfid) {
-                       if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
-                               if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
-                                       mutex->mtxopts.options.lock_count++;
-                                       recursive = 1;
-                                       res = 0;
-                               } else {
-                                       res = EAGAIN;
-                               }
-                       } else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
-                               // <rdar://problem/16261552> as per OpenGroup, trylock cannot
-                               // return EDEADLK on a deadlock, it should return EBUSY.
-                               res = EBUSY;
-                       } else  { /* PTHREAD_MUTEX_ERRORCHECK */
-                               res = EDEADLK;
-                       }
-                       goto out;
-               }
+       res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr);
+       if (res > 0) {
+               recursive = 1;
+               res = 0;
+               goto out;
+       } else if (res < 0) {
+               res = -res;
+               goto out;
         }
  
         bool gotlock;
@@ -797,44 +837,39 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
         } else {
                 PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, omutex,
                                 newseq.lgenval, newseq.ugenval, oldtid);
-               res = _pthread_mutex_lock_wait(omutex, newseq, oldtid);
+               res = _pthread_mutex_fairshare_lock_wait(mutex, newseq, oldtid);
                 PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, omutex,
                                 newseq.lgenval, newseq.ugenval, oldtid);
         }
  
-       if (res == 0 && mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
+       if (res == 0 && _pthread_mutex_is_recursive(mutex)) {
                 mutex->mtxopts.options.lock_count = 1;
         }
  
  out:
  #if PLOCKSTAT
         if (res == 0) {
-               PLOCKSTAT_MUTEX_ACQUIRE(omutex, recursive, 0);
+               PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0);
         } else {
-               PLOCKSTAT_MUTEX_ERROR(omutex, res);
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
         }
  #endif
  
         return res;
  }
  
-PTHREAD_ALWAYS_INLINE
+PTHREAD_NOINLINE
  static inline int
-_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
+_pthread_mutex_fairshare_lock(_pthread_mutex *mutex, bool trylock)
  {
  #if ENABLE_USERSPACE_TRACE
-       return _pthread_mutex_lock_slow(omutex, trylock);
+       return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
  #elif PLOCKSTAT
         if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
-               return _pthread_mutex_lock_slow(omutex, trylock);
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
         }
  #endif
  
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
-               return _pthread_mutex_lock_slow(omutex, trylock);
-       }
-
         uint64_t *tidaddr;
         MUTEX_GETTID_ADDR(mutex, &tidaddr);
         uint64_t selfid = _pthread_selfid_direct();
@@ -846,7 +881,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
         mutex_seq_load(seqaddr, &oldseq);
  
         if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
-               return _pthread_mutex_lock_slow(omutex, trylock);
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
         }
  
         bool gotlock;
@@ -865,7 +900,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
                         newseq.lgenval += PTHRW_INC;
                         newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
                 } else {
-                       return _pthread_mutex_lock_slow(omutex, trylock);
+                       return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
                 }
         } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
                         acquire)));
@@ -880,45 +915,24 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
         }
  }
  
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_lock(pthread_mutex_t *mutex)
-{
-       return _pthread_mutex_lock(mutex, false);
-}
-
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_trylock(pthread_mutex_t *mutex)
-{
-       return _pthread_mutex_lock(mutex, true);
-}
-
-/*
- * Unlock a mutex.
- * TODO: Priority inheritance stuff
- */
-
  PTHREAD_NOINLINE
  static int
-_pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
+_pthread_mutex_fairshare_unlock_drop(_pthread_mutex *mutex, mutex_seq newseq,
                 uint32_t flags)
  {
         int res;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
         uint32_t updateval;
  
         uint64_t *tidaddr;
         MUTEX_GETTID_ADDR(mutex, &tidaddr);
  
-       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, omutex, newseq.lgenval,
+       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, mutex, newseq.lgenval,
                         newseq.ugenval, os_atomic_load(tidaddr, relaxed));
  
-       updateval = __psynch_mutexdrop(omutex, newseq.lgenval, newseq.ugenval,
+       updateval = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval,
                         os_atomic_load(tidaddr, relaxed), flags);
  
-       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, omutex, updateval, 0, 0);
+       PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, mutex, updateval, 0, 0);
  
         if (updateval == (uint32_t)-1) {
                 res = errno;
@@ -930,9 +944,6 @@ _pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
                         PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
                 }
                 return res;
-       } else if ((mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT)
-                       && (updateval & PTH_RWL_PBIT)) {
-               return _pthread_mutex_markprepost(mutex, updateval);
         }
  
         return 0;
@@ -940,49 +951,39 @@ _pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
  
  PTHREAD_NOEXPORT PTHREAD_NOINLINE
  int
-_pthread_mutex_unlock_slow(pthread_mutex_t *omutex)
+_pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex)
  {
         int res;
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
         mutex_seq newseq;
         uint32_t flags;
  
-       // Initialize static mutexes for compatibility with misbehaving
-       // applications (unlock should not be the first operation on a mutex).
-       res = _pthread_mutex_check_init(omutex);
-       if (res != 0) return res;
-
-       res = _pthread_mutex_unlock_updatebits(mutex, &flags, NULL, &newseq.lgenval,
-                       &newseq.ugenval);
+       res = _pthread_mutex_fairshare_unlock_updatebits(mutex, &flags, NULL,
+                       &newseq.lgenval, &newseq.ugenval);
         if (res != 0) return res;
  
         if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
-               return _pthread_mutex_unlock_drop(omutex, newseq, flags);
+               return _pthread_mutex_fairshare_unlock_drop(mutex, newseq, flags);
         } else {
                 uint64_t *tidaddr;
                 MUTEX_GETTID_ADDR(mutex, &tidaddr);
-               PTHREAD_TRACE(psynch_mutex_uunlock, omutex, newseq.lgenval,
+               PTHREAD_TRACE(psynch_mutex_uunlock, mutex, newseq.lgenval,
                                 newseq.ugenval, os_atomic_load(tidaddr, relaxed));
         }
  
         return 0;
  }
  
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_unlock(pthread_mutex_t *omutex)
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_fairshare_unlock(_pthread_mutex *mutex)
  {
  #if ENABLE_USERSPACE_TRACE
-       return _pthread_mutex_unlock_slow(omutex);
+       return _pthread_mutex_fairshare_unlock_slow(mutex);
  #elif PLOCKSTAT
         if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
-               return _pthread_mutex_unlock_slow(omutex);
+               return _pthread_mutex_fairshare_unlock_slow(mutex);
         }
  #endif
-       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
-       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
-               return _pthread_mutex_unlock_slow(omutex);
-       }
  
         uint64_t *tidaddr;
         MUTEX_GETTID_ADDR(mutex, &tidaddr);
@@ -1012,13 +1013,15 @@ pthread_mutex_unlock(pthread_mutex_t *omutex)
  
                 if (os_likely((oldseq.lgenval & PTHRW_COUNT_MASK) ==
                                 (newseq.ugenval & PTHRW_COUNT_MASK))) {
-                       // our unlock sequence matches to lock sequence, so if the
-                       // CAS is successful, the mutex is unlocked
+                       // if we succeed in performing the CAS we can be sure of a fast
+                       // path (only needing the CAS) unlock, if:
+                       //   a. our lock and unlock sequence are equal
+                       //   b. we don't need to clear an unlock prepost from the kernel
  
                         // do not reset Ibit, just K&E
                         newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
                 } else {
-                       return _pthread_mutex_unlock_slow(omutex);
+                       return _pthread_mutex_fairshare_unlock_slow(mutex);
                 }
         } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
                         release)));
@@ -1026,6 +1029,468 @@ pthread_mutex_unlock(pthread_mutex_t *omutex)
         return 0;
  }
  
+#pragma mark firstfit
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_firstfit_unlock_updatebits(_pthread_mutex *mutex,
+               uint32_t *flagsp, uint32_t **mutexp, uint32_t *lvalp, uint32_t *uvalp)
+{
+       uint32_t flags = mutex->mtxopts.value & ~_PTHREAD_MTX_OPT_NOTIFY;
+       bool kernel_wake;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid;
+
+       int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr);
+       if (res > 0) {
+               // Valid recursive unlock
+               if (flagsp) {
+                       *flagsp = flags;
+               }
+               PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
+               return 0;
+       } else if (res < 0) {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res);
+               return -res;
+       }
+
+       do {
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
+               // More than one kernel waiter means we need to do a wake.
+               kernel_wake = diff_genseq(oldseq.lgenval, oldseq.ugenval) > 0;
+               newseq.lgenval &= ~PTH_RWL_EBIT;
+
+               if (kernel_wake) {
+                       // Going to the kernel post-unlock removes a single waiter unlock
+                       // from the mutex counts.
+                       newseq.ugenval += PTHRW_INC;
+               }
+
+               if (oldtid != 0) {
+                       if (!os_atomic_cmpxchg(tidaddr, oldtid, 0, relaxed)) {
+                               return _pthread_mutex_corruption_abort(mutex);
+                       }
+               }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
+
+       PTHREAD_TRACE(psynch_ffmutex_unlock_updatebits, mutex, oldseq.lgenval,
+                       newseq.lgenval, newseq.ugenval);
+
+       if (kernel_wake) {
+               // We choose to return this out via flags because the condition
+               // variable also uses this to determine whether to do a kernel wake
+               // when beginning a cvwait.
+               flags |= _PTHREAD_MTX_OPT_NOTIFY;
+       }
+       if (lvalp) {
+               *lvalp = newseq.lgenval;
+       }
+       if (uvalp) {
+               *uvalp = newseq.ugenval;
+       }
+       if (mutexp) {
+               *mutexp = (uint32_t *)mutex;
+       }
+       if (flagsp) {
+               *flagsp = flags;
+       }
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+static int
+_pthread_mutex_firstfit_wake(_pthread_mutex *mutex, mutex_seq newseq,
+               uint32_t flags)
+{
+       PTHREAD_TRACE(psynch_ffmutex_wake, mutex, newseq.lgenval, newseq.ugenval,
+                       0);
+       int res = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval, 0,
+                       flags);
+
+       if (res == -1) {
+               res = errno;
+               if (res == EINTR) {
+                       res = 0;
+               }
+               if (res != 0) {
+                       PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
+               }
+               return res;
+       }
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex)
+{
+       mutex_seq newseq;
+       uint32_t flags;
+       int res;
+
+       res = _pthread_mutex_firstfit_unlock_updatebits(mutex, &flags, NULL,
+                       &newseq.lgenval, &newseq.ugenval);
+       if (res != 0) return res;
+
+       if (flags & _PTHREAD_MTX_OPT_NOTIFY) {
+               return _pthread_mutex_firstfit_wake(mutex, newseq, flags);
+       }
+       return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static bool
+_pthread_mutex_firstfit_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid,
+               mutex_seq *newseqp)
+{
+       bool gotlock;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex,
+                       oldseq.lgenval, oldseq.ugenval, 0);
+
+       do {
+               newseq = oldseq;
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+
+               if (gotlock) {
+                       // If we see the E-bit cleared, we should just attempt to take it.
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       // If we failed to get the lock then we need to put ourselves back
+                       // in the queue of waiters. The previous unlocker that woke us out
+                       // of the kernel consumed the S-count for our previous wake. So
+                       // take another ticket on L and go back in the kernel to sleep.
+                       newseq.lgenval += PTHRW_INC;
+               }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
+
+       if (gotlock) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+       }
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex,
+                       newseq.lgenval, newseq.ugenval, 0);
+
+       if (newseqp) {
+               *newseqp = newseq;
+       }
+       return gotlock;
+}
+
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_firstfit_lock_wait(_pthread_mutex *mutex, mutex_seq newseq,
+               uint64_t oldtid)
+{
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t selfid = _pthread_selfid_direct();
+
+       PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex);
+       do {
+               uint32_t uval;
+               do {
+                       PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_START, mutex,
+                                       newseq.lgenval, newseq.ugenval, mutex->mtxopts.value);
+                       uval = __psynch_mutexwait(mutex, newseq.lgenval, newseq.ugenval,
+                                       oldtid, mutex->mtxopts.value);
+                       PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_END, mutex,
+                                       uval, 0, 0);
+                       oldtid = os_atomic_load(tidaddr, relaxed);
+               } while (uval == (uint32_t)-1);
+       } while (!_pthread_mutex_firstfit_lock_updatebits(mutex, selfid, &newseq));
+       PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT);
+
+       return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock)
+{
+       int res, recursive = 0;
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t oldtid, selfid = _pthread_selfid_direct();
+
+       res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr);
+       if (res > 0) {
+               recursive = 1;
+               res = 0;
+               goto out;
+       } else if (res < 0) {
+               res = -res;
+               goto out;
+       }
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex,
+                       oldseq.lgenval, oldseq.ugenval, 0);
+
+       bool gotlock;
+       do {
+               newseq = oldseq;
+               oldtid = os_atomic_load(tidaddr, relaxed);
+
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+               if (trylock && !gotlock) {
+                       // We still want to perform the CAS here, even though it won't
+                       // do anything so that it fails if someone unlocked while we were
+                       // in the loop
+               } else if (gotlock) {
+                       // In first-fit, getting the lock simply adds the E-bit
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       // Failed to get the lock, increment the L-val and go to
+                       // the kernel to sleep
+                       newseq.lgenval += PTHRW_INC;
+               }
+       } while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
+
+       PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex,
+                       newseq.lgenval, newseq.ugenval, 0);
+
+       if (gotlock) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+               res = 0;
+               PTHREAD_TRACE(psynch_mutex_ulock, mutex, newseq.lgenval,
+                               newseq.ugenval, selfid);
+       } else if (trylock) {
+               res = EBUSY;
+               PTHREAD_TRACE(psynch_mutex_utrylock_failed, mutex, newseq.lgenval,
+                               newseq.ugenval, oldtid);
+       } else {
+               PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, mutex,
+                               newseq.lgenval, newseq.ugenval, oldtid);
+               res = _pthread_mutex_firstfit_lock_wait(mutex, newseq, oldtid);
+               PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, mutex,
+                               newseq.lgenval, newseq.ugenval, oldtid);
+       }
+
+       if (res == 0 && _pthread_mutex_is_recursive(mutex)) {
+               mutex->mtxopts.options.lock_count = 1;
+       }
+
+out:
+#if PLOCKSTAT
+       if (res == 0) {
+               PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0);
+       } else {
+               PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
+       }
+#endif
+       return res;
+}
+
+#pragma mark fast path
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp,
+               uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+{
+       if (_pthread_mutex_is_fairshare(mutex)) {
+               return _pthread_mutex_fairshare_unlock_updatebits(mutex, flagsp,
+                               pmtxp, mgenp, ugenp);
+       }
+       return _pthread_mutex_firstfit_unlock_updatebits(mutex, flagsp, pmtxp,
+                       mgenp, ugenp);
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock)
+{
+       int res;
+
+       res = _pthread_mutex_check_init(mutex);
+       if (res != 0) return res;
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
+       }
+       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+static int
+_pthread_mutex_unlock_init_slow(_pthread_mutex *mutex)
+{
+       int res;
+
+       // Initialize static mutexes for compatibility with misbehaving
+       // applications (unlock should not be the first operation on a mutex).
+       res = _pthread_mutex_check_init(mutex);
+       if (res != 0) return res;
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_unlock_slow(mutex);
+       }
+       return _pthread_mutex_firstfit_unlock_slow(mutex);
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_unlock(pthread_mutex_t *omutex)
+{
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+               return _pthread_mutex_unlock_init_slow(mutex);
+       }
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_unlock(mutex);
+       }
+
+#if ENABLE_USERSPACE_TRACE
+       return _pthread_mutex_firstfit_unlock_slow(mutex);
+#elif PLOCKSTAT
+       if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+               return _pthread_mutex_firstfit_unlock_slow(mutex);
+       }
+#endif
+
+       /*
+        * This is the first-fit fast path. The fairshare fast-ish path is in
+        * _pthread_mutex_firstfit_unlock()
+        */
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       // We're giving up the mutex one way or the other, so go ahead and
+       // update the owner to 0 so that once the CAS below succeeds, there
+       // is no stale ownership information. If the CAS of the seqaddr
+       // fails, we may loop, but it's still valid for the owner to be
+       // SWITCHING/0
+       os_atomic_store(tidaddr, 0, relaxed);
+
+       do {
+               newseq = oldseq;
+
+               if (diff_genseq(oldseq.lgenval, oldseq.ugenval) == 0) {
+                       // No outstanding waiters in kernel, we can simply drop the E-bit
+                       // and return.
+                       newseq.lgenval &= ~PTH_RWL_EBIT;
+               } else {
+                       return _pthread_mutex_firstfit_unlock_slow(mutex);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       release)));
+
+       return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_firstfit_lock(pthread_mutex_t *omutex, bool trylock)
+{
+       _pthread_mutex *mutex = (_pthread_mutex *)omutex;
+       if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+               return _pthread_mutex_lock_init_slow(mutex, trylock);
+       }
+
+       if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+               return _pthread_mutex_fairshare_lock(mutex, trylock);
+       }
+
+#if ENABLE_USERSPACE_TRACE
+       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+#elif PLOCKSTAT
+       if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+               return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+       }
+#endif
+
+       /*
+        * This is the first-fit fast path. The fairshare fast-ish path is in
+        * _pthread_mutex_firstfit_lock()
+        */
+       uint64_t *tidaddr;
+       MUTEX_GETTID_ADDR(mutex, &tidaddr);
+       uint64_t selfid = _pthread_selfid_direct();
+
+       mutex_seq *seqaddr;
+       MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+       mutex_seq oldseq, newseq;
+       mutex_seq_load(seqaddr, &oldseq);
+
+       if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
+               return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+       }
+
+       bool gotlock;
+       do {
+               newseq = oldseq;
+               gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+
+               if (trylock && !gotlock) {
+                       // A trylock on a held lock will fail immediately. But since
+                       // we did not load the sequence words atomically, perform a
+                       // no-op CAS64 to ensure that nobody has unlocked concurrently.
+               } else if (os_likely(gotlock)) {
+                       // In first-fit, getting the lock simply adds the E-bit
+                       newseq.lgenval |= PTH_RWL_EBIT;
+               } else {
+                       return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+               }
+       } while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+                       acquire)));
+
+       if (os_likely(gotlock)) {
+               os_atomic_store(tidaddr, selfid, relaxed);
+               return 0;
+       } else if (trylock) {
+               return EBUSY;
+       } else {
+               __builtin_trap();
+       }
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+       return _pthread_mutex_firstfit_lock(mutex, false);
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+       return _pthread_mutex_firstfit_lock(mutex, true);
+}
+
  
  PTHREAD_ALWAYS_INLINE
  static inline int
@@ -1040,7 +1505,7 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
                 }
                 mutex->prioceiling = (int16_t)attr->prioceiling;
                 mutex->mtxopts.options.protocol = attr->protocol;
-               mutex->mtxopts.options.policy = attr->policy;
+               mutex->mtxopts.options.policy = attr->opt;
                 mutex->mtxopts.options.type = attr->type;
                 mutex->mtxopts.options.pshared = attr->pshared;
         } else {
@@ -1063,9 +1528,9 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
                 mutex->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
                 mutex->mtxopts.options.protocol = _PTHREAD_DEFAULT_PROTOCOL;
                 if (static_type != 3) {
-                       mutex->mtxopts.options.policy = __pthread_mutex_default_policy;
+                       mutex->mtxopts.options.policy = __pthread_mutex_default_opt_policy;
                 } else {
-                       mutex->mtxopts.options.policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+                       mutex->mtxopts.options.policy = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
                 }
                 mutex->mtxopts.options.pshared = _PTHREAD_DEFAULT_PSHARED;
         }
@@ -1089,7 +1554,8 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
  
         long sig = _PTHREAD_MUTEX_SIG;
         if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL &&
-                       mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FAIRSHARE) {
+                       (_pthread_mutex_is_fairshare(mutex) ||
+                        _pthread_mutex_is_firstfit(mutex))) {
                 // rdar://18148854 _pthread_mutex_lock & pthread_mutex_unlock fastpath
                 sig = _PTHREAD_MUTEX_SIG_fast;
         }
diff --git a/src/pthread_rwlock.c b/src/pthread_rwlock.c

index 85358df6e7fb8d91376f6fcf63ce1e98411d2612..5b0bc9a70916ccf5a155683294dcbed23f0fd301 100644 (file)
--- a/src/pthread_rwlock.c
+++ b/src/pthread_rwlock.c
@@ -61,8 +61,6 @@
  #include <platform/compat.h> // for bzero
  #endif
  
-extern int __unix_conforming;
-
  #ifdef PLOCKSTAT
  #include "plockstat.h"
  #else /* !PLOCKSTAT */
@@ -513,7 +511,7 @@ _pthread_rwlock_updateval(_pthread_rwlock *rwlock, uint32_t updateval)
         rwlock_seq_load(seqaddr, &oldseq, RWLOCK_SEQ_LS);
         do {
                 newseq = oldseq;
-               if (isoverlap || is_rws_setunlockinit(oldseq.rw_seq) != 0) {
+               if (isoverlap || is_rws_unlockinit_set(oldseq.rw_seq)) {
                         // Set S word to the specified value
                         uint32_t savebits = (oldseq.rw_seq & PTHRW_RWS_SAVEMASK);
                         newseq.lcntval = _pthread_rwlock_modbits(oldseq.lcntval, updateval,
@@ -763,7 +761,7 @@ retry:
                                 newseq.lcntval |= PTH_RWL_KBIT | PTH_RWL_WBIT;
                         }
                         newseq.lcntval += PTHRW_INC;
-                       if (is_rws_setseq(oldseq.rw_seq)) {
+                       if (is_rws_sbit_set(oldseq.rw_seq)) {
                                 // Clear the S bit and set S to L
                                 newseq.rw_seq &= (PTHRW_BIT_MASK & ~PTH_RWS_SBIT);
                                 newseq.rw_seq |= (oldseq.lcntval & PTHRW_COUNT_MASK);
diff --git a/src/pthread_tsd.c b/src/pthread_tsd.c

index 3a772668ee81d3ae292dd0afe4cdb94da7bf4e68..54b1bb020d060eedc48e723f994af6a0c49e81e1 100644 (file)
--- a/src/pthread_tsd.c
+++ b/src/pthread_tsd.c
@@ -61,12 +61,13 @@
  // __pthread_tsd_end is the end of dynamic keys.
  
  static const int __pthread_tsd_first = __TSD_RESERVED_MAX + 1;
-static int __pthread_tsd_max = __pthread_tsd_first;
  static const int __pthread_tsd_start = _INTERNAL_POSIX_THREAD_KEYS_MAX;
  static const int __pthread_tsd_end = _INTERNAL_POSIX_THREAD_KEYS_END;
  
-static int __pthread_key_legacy_behaviour = 0;
-static int __pthread_key_legacy_behaviour_log = 0;
+static int __pthread_tsd_max = __pthread_tsd_first;
+static _pthread_lock __pthread_tsd_lock = _PTHREAD_LOCK_INITIALIZER;
+static bool __pthread_key_legacy_behaviour = 0;
+static bool __pthread_key_legacy_behaviour_log = 0;
  
  // Omit support for pthread key destructors in the static archive for dyld.
  // dyld does not create and destroy threads so these are not necessary.
@@ -80,15 +81,17 @@ static struct {
         uintptr_t destructor;
  } _pthread_keys[_INTERNAL_POSIX_THREAD_KEYS_END];
  
-static _pthread_lock tsd_lock = _PTHREAD_LOCK_INITIALIZER;
-
  // The pthread_tsd destruction order can be reverted to the old (pre-10.11) order
  // by setting this environment variable.
  void
  _pthread_key_global_init(const char *envp[])
  {
-       __pthread_key_legacy_behaviour = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER") ? 1 : 0;
-       __pthread_key_legacy_behaviour_log = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG") ? 1 : 0;
+       if (_simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER")) {
+               __pthread_key_legacy_behaviour = true;
+       }
+       if (_simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG")) {
+               __pthread_key_legacy_behaviour_log = true;
+       }
  }
  
  // Returns true if successful, false if destructor was already set.
@@ -133,7 +136,7 @@ pthread_key_create(pthread_key_t *key, void (*destructor)(void *))
         int res = EAGAIN; // Returns EAGAIN if key cannot be allocated.
         pthread_key_t k;
  
-       _PTHREAD_LOCK(tsd_lock);
+       _PTHREAD_LOCK(__pthread_tsd_lock);
         for (k = __pthread_tsd_start; k < __pthread_tsd_end; k++) {
                 if (_pthread_key_set_destructor(k, destructor)) {
                         *key = k;
@@ -141,7 +144,7 @@ pthread_key_create(pthread_key_t *key, void (*destructor)(void *))
                         break;
                 }
         }
-       _PTHREAD_UNLOCK(tsd_lock);
+       _PTHREAD_UNLOCK(__pthread_tsd_lock);
  
         return res;
  }
@@ -151,12 +154,12 @@ pthread_key_delete(pthread_key_t key)
  {
         int res = EINVAL; // Returns EINVAL if key is not allocated.
  
-       _PTHREAD_LOCK(tsd_lock);
+       _PTHREAD_LOCK(__pthread_tsd_lock);
         if (key >= __pthread_tsd_start && key < __pthread_tsd_end) {
                 if (_pthread_key_unset_destructor(key)) {
                         struct _pthread *p;
                         _PTHREAD_LOCK(_pthread_list_lock);
-                       TAILQ_FOREACH(p, &__pthread_head, plist) {
+                       TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
                                 // No lock for word-sized write.
                                 p->tsd[key] = 0;
                         }
@@ -164,7 +167,7 @@ pthread_key_delete(pthread_key_t key)
                         res = 0;
                 }
         }
-       _PTHREAD_UNLOCK(tsd_lock);
+       _PTHREAD_UNLOCK(__pthread_tsd_lock);
  
         return res;
  }
@@ -188,7 +191,7 @@ pthread_setspecific(pthread_key_t key, const void *value)
                                 _pthread_key_set_destructor(key, NULL);
                         }
                         if (key > self->max_tsd_key) {
-                               self->max_tsd_key = (int)key;
+                               self->max_tsd_key = (uint16_t)key;
                         }
                 }
         }
@@ -342,12 +345,12 @@ pthread_key_init_np(int key, void (*destructor)(void *))
  {
         int res = EINVAL; // Returns EINVAL if key is out of range.
         if (key >= __pthread_tsd_first && key < __pthread_tsd_start) {
-               _PTHREAD_LOCK(tsd_lock);
+               _PTHREAD_LOCK(__pthread_tsd_lock);
                 _pthread_key_set_destructor(key, destructor);
                 if (key > __pthread_tsd_max) {
                         __pthread_tsd_max = key;
                 }
-               _PTHREAD_UNLOCK(tsd_lock);
+               _PTHREAD_UNLOCK(__pthread_tsd_lock);
                 res = 0;
         }
         return res;
diff --git a/src/qos.c b/src/qos.c

index b31098a3d825a2a24584f1a8062e12be1647340c..ef360896e694091bf3b2d021a3132c6d26d53d7e 100644 (file)
--- a/src/qos.c
+++ b/src/qos.c
@@ -35,8 +35,6 @@
  #include "workqueue_private.h"
  #include "qos_private.h"
  
-static pthread_priority_t _main_qos = QOS_CLASS_UNSPECIFIED;
-
  #define PTHREAD_OVERRIDE_SIGNATURE     (0x6f766572)
  #define PTHREAD_OVERRIDE_SIG_DEAD      (0x7265766f)
  
@@ -49,172 +47,145 @@ struct pthread_override_s
         bool malloced;
  };
  
-void
-_pthread_set_main_qos(pthread_priority_t qos)
+thread_qos_t
+_pthread_qos_class_to_thread_qos(qos_class_t qos)
  {
-       _main_qos = qos;
+       switch (qos) {
+       case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
+       case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
+       case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
+       case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
+       case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
+       case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
+       default: return THREAD_QOS_UNSPECIFIED;
+       }
  }
  
-int
-pthread_attr_set_qos_class_np(pthread_attr_t *__attr,
-                                                         qos_class_t __qos_class,
-                                                         int __relative_priority)
-{
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
+static inline qos_class_t
+_pthread_qos_class_from_thread_qos(thread_qos_t tqos)
+{
+       static const qos_class_t thread_qos_to_qos_class[THREAD_QOS_LAST] = {
+               [THREAD_QOS_UNSPECIFIED]      = QOS_CLASS_UNSPECIFIED,
+               [THREAD_QOS_MAINTENANCE]      = QOS_CLASS_MAINTENANCE,
+               [THREAD_QOS_BACKGROUND]       = QOS_CLASS_BACKGROUND,
+               [THREAD_QOS_UTILITY]          = QOS_CLASS_UTILITY,
+               [THREAD_QOS_LEGACY]           = QOS_CLASS_DEFAULT,
+               [THREAD_QOS_USER_INITIATED]   = QOS_CLASS_USER_INITIATED,
+               [THREAD_QOS_USER_INTERACTIVE] = QOS_CLASS_USER_INTERACTIVE,
+       };
+       if (os_unlikely(tqos >= THREAD_QOS_LAST)) return QOS_CLASS_UNSPECIFIED;
+       return thread_qos_to_qos_class[tqos];
+}
  
-       if (__relative_priority > 0 || __relative_priority < QOS_MIN_RELATIVE_PRIORITY) {
-               return EINVAL;
+static inline thread_qos_t
+_pthread_validate_qos_class_and_relpri(qos_class_t qc, int relpri)
+{
+       if (relpri > 0 || relpri < QOS_MIN_RELATIVE_PRIORITY) {
+               return THREAD_QOS_UNSPECIFIED;
         }
+       return _pthread_qos_class_to_thread_qos(qc);
+}
  
-       int ret = EINVAL;
-       if (__attr->sig == _PTHREAD_ATTR_SIG) {
-               if (!__attr->schedset) {
-                       __attr->qosclass = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
-                       __attr->qosset = 1;
-                       ret = 0;
-               }
-       }
+static inline void
+_pthread_priority_split(pthread_priority_t pp, qos_class_t *qc, int *relpri)
+{
+       thread_qos_t qos = _pthread_priority_thread_qos(pp);
+       if (qc) *qc = _pthread_qos_class_from_thread_qos(qos);
+       if (relpri) *relpri = _pthread_priority_relpri(pp);
+}
  
-       return ret;
+void
+_pthread_set_main_qos(pthread_priority_t qos)
+{
+       _main_qos = (uint32_t)qos;
  }
  
  int
-pthread_attr_get_qos_class_np(pthread_attr_t * __restrict __attr,
-                                                         qos_class_t * __restrict __qos_class,
-                                                         int * __restrict __relative_priority)
+pthread_attr_set_qos_class_np(pthread_attr_t *attr, qos_class_t qc, int relpri)
  {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
+       thread_qos_t qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+       if (attr->sig != _PTHREAD_ATTR_SIG || attr->schedset) {
+               return EINVAL;
         }
  
-       int ret = EINVAL;
-       if (__attr->sig == _PTHREAD_ATTR_SIG) {
-               if (__attr->qosset) {
-                       qos_class_t qos; int relpri;
-                       _pthread_priority_split_newest(__attr->qosclass, qos, relpri);
+       attr->qosclass = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
+       attr->qosset = 1;
+       attr->schedset = 0;
+       return 0;
+}
  
-                       if (__qos_class) { *__qos_class = qos; }
-                       if (__relative_priority) { *__relative_priority = relpri; }
-               } else {
-                       if (__qos_class) { *__qos_class = 0; }
-                       if (__relative_priority) { *__relative_priority = 0; }
-               }
-               ret = 0;
+int
+pthread_attr_get_qos_class_np(pthread_attr_t *attr, qos_class_t *qc, int *relpri)
+{
+       if (attr->sig != _PTHREAD_ATTR_SIG) {
+               return EINVAL;
         }
  
-       return ret;
+       _pthread_priority_split(attr->qosset ? attr->qosclass : 0, qc, relpri);
+       return 0;
  }
  
  int
-pthread_set_qos_class_self_np(qos_class_t __qos_class,
-                                                         int __relative_priority)
+pthread_set_qos_class_self_np(qos_class_t qc, int relpri)
  {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
-
-       if (__relative_priority > 0 || __relative_priority < QOS_MIN_RELATIVE_PRIORITY) {
+       thread_qos_t qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+       if (!qos) {
                 return EINVAL;
         }
  
-       pthread_priority_t priority = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
-
-       if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-               return _pthread_set_properties_self(_PTHREAD_SET_SELF_QOS_FLAG, priority, 0);
-       } else {
-               /* We set the thread QoS class in the TSD and then call into the kernel to
-                * read the value out of it and set the QoS class.
-                */
-               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
-               mach_port_t kport = _pthread_kernel_thread(pthread_self());
-               int res = __bsdthread_ctl(BSDTHREAD_CTL_SET_QOS, kport, &pthread_self()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS], 0);
-
-               if (res == -1) {
-                       res = errno;
-               }
-
-               return res;
-       }
+       pthread_priority_t pp = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
+       return _pthread_set_properties_self(_PTHREAD_SET_SELF_QOS_FLAG, pp, 0);
  }
  
  int
-pthread_set_qos_class_np(pthread_t __pthread,
-                                                qos_class_t __qos_class,
-                                                int __relative_priority)
+pthread_set_qos_class_np(pthread_t thread, qos_class_t qc, int relpri)
  {
-       if (__pthread != pthread_self()) {
+       if (thread != pthread_self()) {
                 /* The kext now enforces this anyway, if we check here too, it allows us to call
                  * _pthread_set_properties_self later if we can.
                  */
                 return EPERM;
         }
-
-       return pthread_set_qos_class_self_np(__qos_class, __relative_priority);
+       return pthread_set_qos_class_self_np(qc, relpri);
  }
  
  int
-pthread_get_qos_class_np(pthread_t __pthread,
-                                                qos_class_t * __restrict __qos_class,
-                                                int * __restrict __relative_priority)
+pthread_get_qos_class_np(pthread_t thread, qos_class_t *qc, int *relpri)
  {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
-
-       pthread_priority_t priority;
-
-       if (__pthread == pthread_self()) {
-               priority = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-       } else {
-               priority = __pthread->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
-       }
-
-       qos_class_t qos; int relpri;
-       _pthread_priority_split_newest(priority, qos, relpri);
-
-       if (__qos_class) { *__qos_class = qos; }
-       if (__relative_priority) { *__relative_priority = relpri; }
-
+       pthread_priority_t pp = thread->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
+       _pthread_priority_split(pp, qc, relpri);
         return 0;
  }
  
  qos_class_t
  qos_class_self(void)
  {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return QOS_CLASS_UNSPECIFIED;
-       }
-
-       pthread_priority_t p = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-       qos_class_t c = _pthread_priority_get_qos_newest(p);
-
-       return c;
+       pthread_priority_t pp;
+       pp = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
+       return _pthread_qos_class_from_thread_qos(_pthread_priority_thread_qos(pp));
  }
  
  qos_class_t
  qos_class_main(void)
  {
-       return _pthread_priority_get_qos_newest(_main_qos);
+       pthread_priority_t pp = _main_qos;
+       return _pthread_qos_class_from_thread_qos(_pthread_priority_thread_qos(pp));
  }
  
  pthread_priority_t
-_pthread_qos_class_encode(qos_class_t qos_class, int relative_priority, unsigned long flags)
+_pthread_qos_class_encode(qos_class_t qc, int relpri, unsigned long flags)
  {
-       return _pthread_priority_make_newest(qos_class, relative_priority, flags);
+       thread_qos_t qos = _pthread_qos_class_to_thread_qos(qc);
+       return _pthread_priority_make_from_thread_qos(qos, relpri, flags);
  }
  
  qos_class_t
-_pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, unsigned long *flags)
+_pthread_qos_class_decode(pthread_priority_t pp, int *relpri, unsigned long *flags)
  {
-       qos_class_t qos; int relpri;
-
-       _pthread_priority_split_newest(priority, qos, relpri);
-
-       if (relative_priority) { *relative_priority = relpri; }
-       if (flags) { *flags = _pthread_priority_get_flags(priority); }
-       return qos;
+       qos_class_t qc;
+       _pthread_priority_split(pp, &qc, relpri);
+       if (flags) *flags = (pp & _PTHREAD_PRIORITY_FLAGS_MASK);
+       return qc;
  }
  
  // Encode a legacy workqueue API priority into a pthread_priority_t. This API
@@ -222,35 +193,48 @@ _pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, u
  pthread_priority_t
  _pthread_qos_class_encode_workqueue(int queue_priority, unsigned long flags)
  {
+       thread_qos_t qos;
         switch (queue_priority) {
-       case WORKQ_HIGH_PRIOQUEUE:
-               return _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, flags);
-       case WORKQ_DEFAULT_PRIOQUEUE:
-               return _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, flags);
-       case WORKQ_LOW_PRIOQUEUE:
+       case WORKQ_HIGH_PRIOQUEUE:      qos = THREAD_QOS_USER_INTERACTIVE; break;
+       case WORKQ_DEFAULT_PRIOQUEUE:   qos = THREAD_QOS_LEGACY; break;
         case WORKQ_NON_INTERACTIVE_PRIOQUEUE:
-               return _pthread_priority_make_newest(QOS_CLASS_UTILITY, 0, flags);
-       case WORKQ_BG_PRIOQUEUE:
-               return _pthread_priority_make_newest(QOS_CLASS_BACKGROUND, 0, flags);
-       /* Legacy dispatch does not use QOS_CLASS_MAINTENANCE, so no need to handle it here */
+       case WORKQ_LOW_PRIOQUEUE:       qos = THREAD_QOS_UTILITY; break;
+       case WORKQ_BG_PRIOQUEUE:        qos = THREAD_QOS_BACKGROUND; break;
         default:
                 __pthread_abort();
         }
+       return _pthread_priority_make_from_thread_qos(qos, 0, flags);
  }
  
+#define _PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP \
+               (_PTHREAD_SET_SELF_QOS_FLAG | _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG | \
+                _PTHREAD_SET_SELF_TIMESHARE_FLAG)
+
  int
-_pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t priority, mach_port_t voucher)
+_pthread_set_properties_self(_pthread_set_flags_t flags,
+               pthread_priority_t priority, mach_port_t voucher)
  {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_SETSELF)) {
-               return ENOTSUP;
+       pthread_t self = pthread_self();
+       _pthread_set_flags_t kflags = flags;
+       int rv = 0;
+
+       if (self->wqoutsideqos && (flags & _PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP)) {
+               // A number of properties cannot be altered if we are a workloop
+               // thread that has outside of QoS properties applied to it.
+               kflags &= ~_PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP;
+               if (kflags == 0) goto skip;
         }
  
-       int rv = __bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, flags);
+       rv = __bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, kflags);
  
-       /* Set QoS TSD if we succeeded or only failed the voucher half. */
+skip:
+        // Set QoS TSD if we succeeded, or only failed the voucher portion of the
+        // call. Additionally, if we skipped setting QoS because of outside-of-QoS
+        // attributes then we still want to set the TSD in userspace.
         if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
                 if (rv == 0 || errno == ENOENT) {
-                       _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
+                       _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, 
+                                       priority);
                 }
         }
  
@@ -263,37 +247,21 @@ _pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t prio
  int
  pthread_set_fixedpriority_self(void)
  {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
-
-       if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-               return _pthread_set_properties_self(_PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG, 0, 0);
-       } else {
-               return ENOTSUP;
-       }
+       return _pthread_set_properties_self(_PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG, 0, 0);
  }
  
  int
  pthread_set_timeshare_self(void)
  {
-       if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-               return ENOTSUP;
-       }
-
-       if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-               return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0);
-       } else {
-               return ENOTSUP;
-       }
+       return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0);
  }
  
-
  pthread_override_t
-pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_class, int __relative_priority)
+pthread_override_qos_class_start_np(pthread_t thread,  qos_class_t qc, int relpri)
  {
         pthread_override_t rv;
         kern_return_t kr;
+       thread_qos_t qos;
         int res = 0;
  
         /* For now, we don't have access to malloc. So we'll have to vm_allocate this, which means the tiny struct is going
@@ -301,23 +269,30 @@ pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_clas
          */
         bool did_malloc = true;
  
+       qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+       if (qos == THREAD_QOS_UNSPECIFIED) {
+               return (_Nonnull pthread_override_t)NULL;
+       }
+
         mach_vm_address_t vm_addr = malloc(sizeof(struct pthread_override_s));
         if (!vm_addr) {
                 vm_addr = vm_page_size;
                 did_malloc = false;
  
-               kr = mach_vm_allocate(mach_task_self(), &vm_addr, round_page(sizeof(struct pthread_override_s)), VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH) | VM_FLAGS_ANYWHERE);
+               kr = mach_vm_allocate(mach_task_self(), &vm_addr,
+                               round_page(sizeof(struct pthread_override_s)),
+                               VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH) | VM_FLAGS_ANYWHERE);
                 if (kr != KERN_SUCCESS) {
                         errno = ENOMEM;
-                       return (_Nonnull pthread_override_t) NULL;
+                       return (_Nonnull pthread_override_t)NULL;
                 }
         }
  
         rv = (pthread_override_t)vm_addr;
         rv->sig = PTHREAD_OVERRIDE_SIGNATURE;
-       rv->pthread = __pthread;
-       rv->kthread = pthread_mach_thread_np(__pthread);
-       rv->priority = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
+       rv->pthread = thread;
+       rv->kthread = pthread_mach_thread_np(thread);
+       rv->priority = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
         rv->malloced = did_malloc;
  
         /* To ensure that the kernel port that we keep stays valid, we retain it here. */
@@ -342,7 +317,7 @@ pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_clas
                 }
                 rv = NULL;
         }
-       return (_Nonnull pthread_override_t) rv;
+       return (_Nonnull pthread_override_t)rv;
  }
  
  int
@@ -523,7 +498,11 @@ _pthread_workqueue_parallelism_for_priority(int qos, unsigned long flags)
  int
  pthread_qos_max_parallelism(qos_class_t qos, unsigned long flags)
  {
-       int thread_qos = _pthread_qos_class_to_thread_qos(qos);
+       thread_qos_t thread_qos;
+       if (qos == QOS_CLASS_UNSPECIFIED) {
+               qos = QOS_CLASS_DEFAULT; // <rdar://problem/35080198>
+       }
+       thread_qos = _pthread_qos_class_to_thread_qos(qos);
         if (thread_qos == THREAD_QOS_UNSPECIFIED) {
                 errno = EINVAL;
                 return -1;
diff --git a/src/thread_setup.c b/src/thread_setup.c

index 761103eca3caaf15d21ada2a89c43a0ba68ac511..22cc6892466976521722773e52b9b389c45dc257 100644 (file)
--- a/src/thread_setup.c
+++ b/src/thread_setup.c
@@ -45,23 +45,24 @@
   * MkLinux
   */
  
+#include "internal.h"
+
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
  /*
   * Machine specific support for thread initialization
   */
  
-#include "internal.h"
-#include <platform/string.h>
+// NOTE: no resolvers, so this file must not contain any atomic operations
  
+PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp);
  
  /*
   * Set up the initial state of a MACH thread
   */
  void
-_pthread_setup(pthread_t thread,
+_pthread_setup_suspended(pthread_t thread,
                void (*routine)(pthread_t),
-              void *vsp,
-              int suspended,
-              int needresume)
+              void *vsp)
  {
  #if defined(__i386__)
         i386_thread_state_t state = { };
@@ -71,20 +72,12 @@ _pthread_setup(pthread_t thread,
         x86_thread_state64_t state = { };
         thread_state_flavor_t flavor = x86_THREAD_STATE64;
         mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
-#elif defined(__arm__)
-       arm_thread_state_t state = { };
-       thread_state_flavor_t flavor = ARM_THREAD_STATE;
-       mach_msg_type_number_t count = ARM_THREAD_STATE_COUNT;
  #else
  #error _pthread_setup not defined for this architecture
  #endif
  
-       if (suspended) {
-               (void)thread_get_state(_pthread_kernel_thread(thread),
-                                    flavor,
-                                    (thread_state_t)&state,
-                                    &count);
-       }
+       (void)thread_get_state(_pthread_kernel_thread(thread),
+                       flavor, (thread_state_t)&state, &count);
  
  #if defined(__i386__)
         uintptr_t *sp = vsp;
@@ -110,46 +103,10 @@ _pthread_setup(pthread_t thread,
         state.__rdi = (uintptr_t)thread;        // argument to function
         *--sp = 0;                              // fake return address
         state.__rsp = (uintptr_t)sp;            // set stack pointer
-#elif defined(__arm__)
-       state.__pc = (uintptr_t)routine;
-
-       // Detect switch to thumb mode.
-       if (state.__pc & 1) {
-           state.__pc &= ~1;
-           state.__cpsr |= 0x20; /* PSR_THUMB */
-       }
-
-       state.__sp = (uintptr_t)vsp - C_ARGSAVE_LEN - C_RED_ZONE;
-       state.__r[0] = (uintptr_t)thread;
  #else
-#error _pthread_setup not defined for this architecture
+#error _pthread_setup_suspended not defined for this architecture
  #endif
  
-       if (suspended) {
-               (void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count);
-               if (needresume) {
-                       (void)thread_resume(_pthread_kernel_thread(thread));
-               }
-       } else {
-               mach_port_t kernel_thread;
-               (void)thread_create_running(mach_task_self(), flavor, (thread_state_t)&state, count, &kernel_thread);
-               _pthread_set_kernel_thread(thread, kernel_thread);
-       }
-}
-
-// pthread_setup initializes large structures to 0, which the compiler turns into a library call to memset. To avoid linking against
-// Libc, provide a simple wrapper that calls through to the libplatform primitives
-
-#undef memset
-__attribute__((visibility("hidden"))) void *
-memset(void *b, int c, size_t len)
-{
-       return _platform_memset(b, c, len);
-}
-
-#undef bzero
-__attribute__((visibility("hidden"))) void
-bzero(void *s, size_t n)
-{
-       _platform_bzero(s, n);
+       (void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count);
  }
+#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX
diff --git a/tests/Makefile b/tests/Makefile

index 408b1010c88423b59f8390a82d019f0de27571f3..84e2717439c7292b53be73eb0518e6020ea781d4 100644 (file)
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -12,10 +12,13 @@ include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.common
  TARGETS :=
  TARGETS += atfork
  TARGETS += bsdthread_set_self
+TARGETS += stack
+TARGETS += stack_size
  TARGETS += cond
  #TARGETS += cond_hang3
  #TARGETS += cond_stress
  TARGETS += cond_timed
+TARGETS += cond_prepost
  TARGETS += custom_stack
  TARGETS += stack_aslr
  TARGETS += join
@@ -24,6 +27,7 @@ TARGETS += main_stack_custom
  TARGETS += detach
  #TARGETS += maxwidth
  TARGETS += mutex
+TARGETS += mutex_prepost
  TARGETS += mutex_try
  TARGETS += once_cancel
  TARGETS += pthread_attr_setstacksize
@@ -35,6 +39,7 @@ TARGETS += pthread_introspection
  TARGETS += pthread_setspecific
  TARGETS += pthread_threadid_np
  TARGETS += pthread_get_qos_class_np
+TARGETS += pthread_dependency
  #TARGETS += qos
  TARGETS += rdar_32848402
  #TARGETS += rwlock-22244050
@@ -47,6 +52,7 @@ TARGETS += tsd
  #TARGETS += wq_kevent_stress
  TARGETS += wq_limits
  TARGETS += add_timer_termination
+TARGETS += perf_contended_mutex_rwlock
  
  OTHER_LTE_INCLUDE_FILES += \
         /usr/local/lib/libdarwintest_utils.dylib
@@ -54,7 +60,7 @@ OTHER_LTE_INCLUDE_FILES += \
  OTHER_CFLAGS := -DDARWINTEST -Weverything \
                 -Wno-vla -Wno-bad-function-cast -Wno-missing-noreturn \
                 -Wno-missing-field-initializers -Wno-format-pedantic \
-               -Wno-gnu-folding-constant
+               -Wno-gnu-folding-constant -Wno-used-but-marked-unused
  OTHER_LDFLAGS := -ldarwintest_utils
  
  #TARGETS += main_stack_legacy // Disabled by default due to linker warnings
@@ -63,8 +69,8 @@ OTHER_LDFLAGS := -ldarwintest_utils
  #main_stack_legacy: ARCH_FLAGS = -arch i386
  #main_stack_legacy: DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=10.7
  
-main_stack_custom: OTHER_LDFLAGS += -Wl,-stack_size,0x14000
-main_stack_custom: OTHER_CFLAGS += -DSTACKSIZE=0x14000
+main_stack_custom: OTHER_LDFLAGS += -Wl,-stack_size,0x124000
+main_stack_custom: OTHER_CFLAGS += -DSTACKSIZE=0x124000
  
  bsdthread_set_self: OTHER_CFLAGS += -D_DARWIN_FEATURE_CLOCK_GETTIME
  
diff --git a/tests/cond_prepost.c b/tests/cond_prepost.c

new file mode 100644 (file)

index 0000000..df8b86e
--- /dev/null
+++ b/tests/cond_prepost.c
@@ -0,0 +1,217 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <TargetConditionals.h>
+
+#include <pthread/pthread_spis.h>
+
+#include <sys/sysctl.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_multiprocess.h>
+
+// <rdar://problem/38810583> this test case is intended to test for the
+// specific issue found in this radar. That is, if:
+//
+//     1. A mutex is in first-fit policy mode, and
+//     2. is used as the mutex in a pthread_cond_wait (or timedwait), and
+//     3. the mutex has the K-bit set but has no kernel waiters, and
+//     4. the cvwait call preposts an unlock to the mutex
+//
+//  Under these conditions, the fact that the cvwait preposted an unlock to
+//  the paired mutex is lost during the call. The P-bit was never returned to
+//  userspace and the kwq in the kernel would continue to exist. If the same
+//  uaddr is then reused as another synchroniser type then we would often
+//  return EINVAL from the wait/lock function.
+//
+//  So this test is attempting to:
+//
+//     1. Repeatedly bang on a mutex+cvar for a number of iterations in the
+//        hope of triggering a cvwait prepost situation.
+//     2. Then destroy both the mutex and cvar, and reinitialise each memory
+//        location as the opposite type of synchroniser. Then cvwait once to
+//        trigger the failure condition.
+
+struct context {
+       union {
+               pthread_mutex_t mutex;
+               pthread_cond_t cond;
+       };
+       union {
+               pthread_mutex_t mutex2;
+               pthread_cond_t cond2;
+       };
+       long value;
+       long count;
+       long waiter;
+};
+
+static void *test_cond(void *ptr) {
+       struct context *context = ptr;
+       int res;
+
+       res = pthread_cond_wait(&context->cond, &context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "condition wait on condvar completed");
+       res = pthread_mutex_unlock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "unlock condvar mutex");
+       return NULL;
+}
+
+static void *test_cond_wake(void *ptr) {
+       struct context *context = ptr;
+       int res;
+
+       res = pthread_mutex_lock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "locked condvar mutex");
+       res = pthread_cond_signal(&context->cond);
+       T_ASSERT_POSIX_ZERO(res, "condvar signalled");
+       res = pthread_mutex_unlock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "dropped condvar mutex");
+
+       return NULL;
+}
+
+static void *test_thread(void *ptr) {
+       int res;
+       long old;
+       struct context *context = ptr;
+
+       int i = 0;
+       char *str;
+
+       do {
+               bool try = i++ & 1;
+               bool cond = i & 16;
+
+               if (!try){
+                       str = "pthread_mutex_lock";
+                       res = pthread_mutex_lock(&context->mutex);
+               } else {
+                       str = "pthread_mutex_trylock";
+                       res = pthread_mutex_trylock(&context->mutex);
+               }
+               if (res != 0) {
+                       if (try && res == EBUSY) {
+                               continue;
+                       }
+                       T_ASSERT_POSIX_ZERO(res, "[%ld] %s", context->count, str);
+               }
+
+               old = __sync_fetch_and_or(&context->value, 1);
+               if ((old & 1) != 0) {
+                       T_FAIL("[%ld] OR %lx\n", context->count, old);
+               }
+
+               old = __sync_fetch_and_and(&context->value, 0);
+               if ((old & 1) == 0) {
+                       T_FAIL("[%ld] AND %lx\n", context->count, old);
+               }
+
+               if (cond && !context->waiter) {
+                       context->waiter = 1;
+                       struct timespec ts = {
+                               .tv_sec = 0,
+                               .tv_nsec = 10ull * NSEC_PER_MSEC,
+                       };
+
+                       res = pthread_cond_timedwait_relative_np(&context->cond2, &context->mutex, &ts);
+                       if (res == ETIMEDOUT) {
+                               // ignore, should be the last thread out
+                       } else if (res) {
+                               T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_cond_wait",
+                                               context->count);
+                       }
+                       context->waiter = 0;
+                       res = pthread_mutex_unlock(&context->mutex);
+                       if (res) {
+                               T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_unlock",
+                                               context->count);
+                       }
+               } else {
+                       if (context->waiter) {
+                               res = pthread_cond_broadcast(&context->cond2);
+                               if (res) {
+                                       T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_cond_broadcast",
+                                                       context->count);
+                               }
+                       }
+                       res = pthread_mutex_unlock(&context->mutex);
+                       if (res) {
+                               T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_unlock",
+                                               context->count);
+                       }
+               }
+       } while (__sync_fetch_and_sub(&context->count, 1) > 0);
+       return NULL;
+}
+
+
+static void
+_test_condvar_prepost_race(void)
+{
+       struct context context = {
+               .mutex = PTHREAD_MUTEX_INITIALIZER,
+               .cond2 = PTHREAD_COND_INITIALIZER,
+               .value = 0,
+               .count = 10000,
+               .waiter = false,
+       };
+       int i;
+       int res;
+       int threads = 8;
+       pthread_t p[threads];
+       for (i = 0; i < threads; ++i) {
+               res = pthread_create(&p[i], NULL, test_thread, &context);
+               T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+       }
+       for (i = 0; i < threads; ++i) {
+               res = pthread_join(p[i], NULL);
+               T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+       }
+
+       T_PASS("initial pthread mutex storm completed");
+
+       pthread_mutex_destroy(&context.mutex);
+       pthread_cond_destroy(&context.cond2);
+
+       pthread_mutex_init(&context.mutex2, NULL);
+       pthread_cond_init(&context.cond, NULL);
+       res = pthread_mutex_lock(&context.mutex2);
+       T_ASSERT_POSIX_ZERO(res, "mutex lock for condition wait");
+       res = pthread_create(&p[0], NULL, test_cond, &context);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+       res = pthread_create(&p[1], NULL, test_cond_wake, &context);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+
+       res = pthread_join(p[0], NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+       res = pthread_join(p[1], NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+
+       pthread_cond_destroy(&context.cond);
+}
+
+T_DECL(cond_prepost_fairshare, "cond_prepost_fairshare (fairshare)",
+       T_META_ALL_VALID_ARCHS(YES),
+       T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=1"))
+{
+       int i;
+       int count = 100;
+       for (i=0; i < count; i++) {
+               _test_condvar_prepost_race();
+       }
+}
+
+T_DECL(cond_prepost_firstfit, "cond_prepost_firstfit (firstfit)",
+       T_META_ALL_VALID_ARCHS(YES),
+       T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
+{
+       int i;
+       int count = 100;
+       for (i=0; i < count; i++) {
+               _test_condvar_prepost_race();
+       }
+}
diff --git a/tests/main_stack_custom.c b/tests/main_stack_custom.c

index eb0d660114fe8806fc4015f86027de420afdabcd..2e992a8874c18a4d8aa11c1a7770727e043a690d 100644 (file)
--- a/tests/main_stack_custom.c
+++ b/tests/main_stack_custom.c
@@ -14,7 +14,7 @@ T_DECL(main_stack_custom, "tests the reported values for a custom main thread st
  
         struct rlimit lim;
         T_QUIET; T_ASSERT_POSIX_SUCCESS(getrlimit(RLIMIT_STACK, &lim), NULL);
-       lim.rlim_cur = lim.rlim_cur / 8;
+       lim.rlim_cur = lim.rlim_cur + 32 * PAGE_SIZE;
         T_EXPECT_EQ(setrlimit(RLIMIT_STACK, &lim), -1, "setrlimit for stack should fail with custom stack");
         T_EXPECT_EQ((size_t)STACKSIZE, pthread_get_stacksize_np(pthread_self()), "reported stacksize shouldn't change");
  }
diff --git a/tests/mutex.c b/tests/mutex.c

index 0b1e1d47444e47d3c380b10466cc9f34b7f93e24..9fe02774e988bf6941aed668aa033fc72e141a75 100644 (file)
--- a/tests/mutex.c
+++ b/tests/mutex.c
@@ -4,6 +4,7 @@
  #include <unistd.h>
  #include <stdbool.h>
  #include <errno.h>
+#include <TargetConditionals.h>
  
  #include <pthread/pthread_spis.h>
  
@@ -104,7 +105,7 @@ check_process_default_mutex_policy(int expected_policy)
  T_DECL(mutex_default_policy,
                 "Tests that the default mutex policy is fairshare")
  {
-       check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FAIRSHARE);
+       check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT);
  }
  
  T_DECL(mutex_default_policy_sysctl,
@@ -133,7 +134,7 @@ T_HELPER_DECL(mutex_default_policy_sysctl_helper, "sysctl helper")
  
  T_DECL(mutex_default_policy_envvar,
                 "Tests that setting the policy environment variable changes the default policy",
-               T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=2"))
+               T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
  {
         check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT);
  }
diff --git a/tests/mutex_prepost.c b/tests/mutex_prepost.c

new file mode 100644 (file)

index 0000000..6423e20
--- /dev/null
+++ b/tests/mutex_prepost.c
@@ -0,0 +1,157 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <TargetConditionals.h>
+
+#include <pthread/pthread_spis.h>
+
+#include <sys/sysctl.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_multiprocess.h>
+
+struct context {
+       union {
+               pthread_mutex_t mutex;
+               pthread_cond_t cond;
+       };
+       pthread_mutex_t mutex2;
+       long value;
+       long count;
+};
+
+static void *test_cond(void *ptr) {
+       struct context *context = ptr;
+       int res;
+       
+       res = pthread_cond_wait(&context->cond, &context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "condition wait on condvar completed");
+       res = pthread_mutex_unlock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "unlock condvar mutex");
+       return NULL;
+}
+
+static void *test_cond_wake(void *ptr) {
+       struct context *context = ptr;
+       int res;
+       
+       res = pthread_mutex_lock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "locked condvar mutex");
+       res = pthread_cond_signal(&context->cond);
+       T_ASSERT_POSIX_ZERO(res, "condvar signalled");
+       res = pthread_mutex_unlock(&context->mutex2);
+       T_ASSERT_POSIX_ZERO(res, "dropped condvar mutex");
+
+       return NULL;
+}
+
+static void *test_thread(void *ptr) {
+       int res;
+       long old;
+       struct context *context = ptr;
+
+       int i = 0;
+       char *str;
+
+       do {
+               bool try = i++ & 1;
+
+               if (!try){
+                       str = "pthread_mutex_lock";
+                       res = pthread_mutex_lock(&context->mutex);
+               } else {
+                       str = "pthread_mutex_trylock";
+                       res = pthread_mutex_trylock(&context->mutex);
+               }
+               if (res != 0) {
+                       if (try && res == EBUSY) {
+                               continue;
+                       }
+                       T_ASSERT_POSIX_ZERO(res, "[%ld] %s", context->count, str);
+               }
+               
+               old = __sync_fetch_and_or(&context->value, 1);
+               if ((old & 1) != 0) {
+                       T_FAIL("[%ld] OR %lx\n", context->count, old);
+               }
+
+               old = __sync_fetch_and_and(&context->value, 0);
+               if ((old & 1) == 0) {
+                       T_FAIL("[%ld] AND %lx\n", context->count, old);
+               }
+       
+               res = pthread_mutex_unlock(&context->mutex);
+               if (res) {
+                       T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_lock", context->count);
+               }
+       } while (__sync_fetch_and_sub(&context->count, 1) > 0);
+       return NULL;
+}
+
+
+static void
+_test_condvar_prepost_race(void)
+{
+       struct context context = {
+               .mutex = PTHREAD_MUTEX_INITIALIZER,
+               .mutex2 = PTHREAD_MUTEX_INITIALIZER,
+               .value = 0,
+               .count = 1000,
+       };
+       int i;
+       int res;
+       int threads = 8;
+       pthread_t p[threads];
+       for (i = 0; i < threads; ++i) {
+               res = pthread_create(&p[i], NULL, test_thread, &context);
+               T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+       }
+       for (i = 0; i < threads; ++i) {
+               res = pthread_join(p[i], NULL);
+               T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+       }
+
+       T_PASS("initial pthread mutex storm completed");
+
+       pthread_mutex_destroy(&context.mutex);
+
+       pthread_cond_init(&context.cond, NULL);
+       res = pthread_mutex_lock(&context.mutex2);
+       T_ASSERT_POSIX_ZERO(res, "mutex lock for condition wait");
+       res = pthread_create(&p[0], NULL, test_cond, &context);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+       res = pthread_create(&p[1], NULL, test_cond_wake, &context);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+
+       res = pthread_join(p[0], NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+       res = pthread_join(p[1], NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+
+       pthread_cond_destroy(&context.cond);
+}
+
+T_DECL(mutex_prepost_fairshare, "pthread_mutex_prepost (fairshare)",
+       T_META_ALL_VALID_ARCHS(YES),
+       T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=1"))
+{
+       int i;
+       int count = 100;
+       for (i=0; i < count; i++) {
+               _test_condvar_prepost_race();
+       }
+}
+
+T_DECL(mutex_prepost_firstfit, "pthread_mutex_prepost (firstfit)",
+       T_META_ALL_VALID_ARCHS(YES),
+       T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
+{
+       int i;
+       int count = 100;
+       for (i=0; i < count; i++) {
+               _test_condvar_prepost_race();
+       }
+}
diff --git a/tests/perf_contended_mutex_rwlock.c b/tests/perf_contended_mutex_rwlock.c

new file mode 100644 (file)

index 0000000..e4219c5
--- /dev/null
+++ b/tests/perf_contended_mutex_rwlock.c
@@ -0,0 +1,519 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdatomic.h>
+#include <math.h>
+#include <unistd.h>
+#include <sys/sysctl.h>
+#include <mach/mach.h>
+#include <pthread.h>
+#include <pthread/pthread_spis.h>
+#include <os/lock.h>
+#include <darwintest.h>
+
+// number of times the lock is taken per dt_stat batch
+#define ITERATIONS_PER_DT_STAT_BATCH 10000ull
+// number of times the contended mutex is taken per dt_stat batch
+#define ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX 1000ull
+// shift determining power of 2 factor of time spent by worker threads in the
+// busy() function while outside of the lock vs inside the lock
+#define OUTER_VS_INNER_SHIFT 4
+// fraction of read lock vs write lock acquires
+#define RDLOCK_FRACTION 0.99f
+// maintain and print progress counters in between measurement batches
+#define COUNTERS 0
+
+// move the darwintest assertion code out of the straight line execution path
+// since it is has non-trivial overhead and codegen impact even if the assertion
+// is never triggered.
+#define iferr(_e) if(__builtin_expect(!!(_e), 0))
+
+#pragma mark -
+
+uint64_t
+random_busy_counts(unsigned int *seed, uint64_t *inner, uint64_t *outer)
+{
+       uint64_t random = rand_r(seed);
+       const uint64_t of = (1 << OUTER_VS_INNER_SHIFT);
+       *inner = 0x4 + (random & (0x10 - 1));
+       *outer = 0x4 * of + ((random >> 4) & (0x10 * of - 1));
+       return random;
+}
+
+// By default busy() does cpu busy work for a passed in number of iterations
+enum {
+       busy_is_nothing = 0,
+       busy_is_cpu_busy,
+       busy_is_cpu_yield,
+};
+static int busy_select = busy_is_cpu_busy;
+
+static double
+cpu_busy(uint64_t n)
+{
+       double d = M_PI;
+       uint64_t i;
+       for (i = 0; i < n; i++) d *= M_PI;
+       return d;
+}
+
+static double
+cpu_yield(uint64_t n)
+{
+       uint64_t i;
+       for (i = 0; i < n; i++) {
+#if defined(__arm__) || defined(__arm64__)
+       asm volatile("yield");
+#elif defined(__x86_64__) || defined(__i386__)
+       asm volatile("pause");
+#else
+#error Unrecognized architecture
+#endif
+       }
+       return 0;
+}
+
+__attribute__((noinline))
+static double
+busy(uint64_t n)
+{
+       switch(busy_select) {
+       case busy_is_cpu_busy:
+               return cpu_busy(n);
+       case busy_is_cpu_yield:
+               return cpu_yield(n);
+       default:
+               return 0;
+       }
+}
+
+#pragma mark -
+
+static semaphore_t ready_sem, start_sem, end_sem;
+static uint32_t nthreads;
+static _Atomic uint32_t active_thr;
+static _Atomic int64_t todo;
+uint64_t iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH;
+
+#if COUNTERS
+static _Atomic uint64_t total_locks, total_rdlocks, total_wrlocks;
+#define ctr_inc(_t) atomic_fetch_add_explicit(&(_t), 1, memory_order_relaxed)
+#else
+#define ctr_inc(_t)
+#endif
+
+static uint32_t
+ncpu(void)
+{
+       static uint32_t activecpu, physicalcpu;
+       if (!activecpu) {
+               uint32_t n;
+               size_t s = sizeof(n);
+               sysctlbyname("hw.activecpu", &n, &s, NULL, 0);
+               activecpu = n;
+               s = sizeof(n);
+               sysctlbyname("hw.physicalcpu", &n, &s, NULL, 0);
+               physicalcpu = n;
+       }
+       return MIN(activecpu, physicalcpu);
+}
+
+__attribute__((noinline))
+static void
+threaded_bench(dt_stat_time_t s, int batch_size)
+{
+       kern_return_t kr;
+       for (int i = 0; i < nthreads; i++) {
+               kr = semaphore_wait(ready_sem);
+               iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
+       }
+       atomic_init(&active_thr, nthreads);
+       atomic_init(&todo, batch_size * iterations_per_dt_stat_batch);
+       dt_stat_token t = dt_stat_begin(s);
+       kr = semaphore_signal_all(start_sem);
+       iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");}
+       kr = semaphore_wait(end_sem);
+       iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
+       dt_stat_end_batch(s, batch_size, t);
+}
+
+static void
+setup_threaded_bench(void* (*thread_fn)(void*), bool singlethreaded)
+{
+       kern_return_t kr;
+       int r;
+       char *e;
+
+       if (singlethreaded) {
+               nthreads = 1;
+       } else {
+               if ((e = getenv("DT_STAT_NTHREADS"))) nthreads = strtoul(e, NULL, 0);
+               if (nthreads < 2) nthreads = ncpu();
+       }
+       if ((e = getenv("DT_STAT_CPU_BUSY"))) busy_select = strtoul(e, NULL, 0);
+
+       kr = semaphore_create(mach_task_self(), &ready_sem, SYNC_POLICY_FIFO, 0);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+       kr = semaphore_create(mach_task_self(), &start_sem, SYNC_POLICY_FIFO, 0);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+       kr = semaphore_create(mach_task_self(), &end_sem, SYNC_POLICY_FIFO, 0);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+
+       pthread_attr_t attr;
+       r = pthread_attr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_init");
+       r = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_setdetachstate");
+
+       for (int i = 0; i < nthreads; i++) {
+               pthread_t th;
+               r = pthread_create(&th, &attr, thread_fn, (void *)(uintptr_t)(i+1));
+               T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_create");
+       }
+}
+
+#pragma mark -
+
+static pthread_mutex_t mutex;
+
+static void *
+mutex_bench_thread(void * arg)
+{
+       kern_return_t kr;
+       int r;
+       unsigned int seed;
+       volatile double dummy;
+
+restart:
+       seed = (uintptr_t)arg; // each thread repeats its own sequence
+       kr = semaphore_wait_signal(start_sem, ready_sem);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+       while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+               uint64_t inner, outer;
+               random_busy_counts(&seed, &inner, &outer);
+               dummy = busy(outer);
+               r = pthread_mutex_lock(&mutex);
+               iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");}
+               dummy = busy(inner);
+               r = pthread_mutex_unlock(&mutex);
+               iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");}
+               ctr_inc(total_locks);
+       }
+
+       if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+               kr = semaphore_signal(end_sem);
+               T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+       }
+       goto restart;
+}
+
+static void
+mutex_bench(bool singlethreaded)
+{
+       int r;
+       int batch_size;
+#if COUNTERS
+       uint64_t batch = 0;
+#endif
+
+       setup_threaded_bench(mutex_bench_thread, singlethreaded);
+
+       pthread_mutexattr_t attr;
+       r = pthread_mutexattr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init");
+       pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FAIRSHARE);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np");
+       r = pthread_mutex_init(&mutex, &attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init");
+
+       dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & "
+                       "pthread_mutex_unlock (fairshare) on %u thread%s",
+                       iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+       do {
+               batch_size = dt_stat_batch_size(s);
+               threaded_bench(s, batch_size);
+#if COUNTERS
+               fprintf(stderr, "\rbatch: %4llu\t size: %4d\tmutexes: %8llu",
+                               ++batch, batch_size,
+                               atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+       } while (!dt_stat_stable(s));
+#if COUNTERS
+       fprintf(stderr, "\n");
+#endif
+       dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_mutex_bench, "Uncontended fairshare mutex",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       mutex_bench(true);
+}
+
+T_DECL(perf_contended_mutex_bench, "Contended fairshare mutex",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX;
+       mutex_bench(false);
+}
+
+#pragma mark -
+
+static pthread_rwlock_t rwlock;
+
+static void *
+rwlock_bench_thread(void * arg)
+{
+       kern_return_t kr;
+       int r;
+       unsigned int seed;
+       volatile double dummy;
+       const uint64_t rand_rdlock_max = (double)RAND_MAX * RDLOCK_FRACTION;
+
+restart:
+       seed = (uintptr_t)arg; // each thread repeats its own sequence
+       kr = semaphore_wait_signal(start_sem, ready_sem);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+       while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+               uint64_t inner, outer;
+               uint64_t random = random_busy_counts(&seed, &inner, &outer);
+               dummy = busy(outer);
+               if (random < rand_rdlock_max) {
+                       r = pthread_rwlock_rdlock(&rwlock);
+                       iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_rdlock");}
+                       dummy = busy(inner);
+                       r = pthread_rwlock_unlock(&rwlock);
+                       iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");}
+                       ctr_inc(total_rdlocks);
+               } else {
+                       r = pthread_rwlock_wrlock(&rwlock);
+                       iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_wrlock");}
+                       dummy = busy(inner);
+                       r = pthread_rwlock_unlock(&rwlock);
+                       iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");}
+                       ctr_inc(total_wrlocks);
+               }
+               ctr_inc(total_locks);
+       }
+
+       if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+               kr = semaphore_signal(end_sem);
+               T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+       }
+       goto restart;
+}
+
+static void
+rwlock_bench(bool singlethreaded)
+{
+       int r;
+       int batch_size;
+#if COUNTERS
+       uint64_t batch = 0;
+#endif
+
+       setup_threaded_bench(rwlock_bench_thread, singlethreaded);
+
+       r = pthread_rwlock_init(&rwlock, NULL);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_init");
+
+       dt_stat_time_t s = dt_stat_time_create("%llu pthread_rwlock_rd/wrlock & "
+                       "pthread_rwlock_unlock (%.0f%% rdlock) on %u thread%s",
+                       iterations_per_dt_stat_batch, RDLOCK_FRACTION * 100, nthreads,
+                       nthreads > 1 ? "s" : "");
+       do {
+               batch_size = dt_stat_batch_size(s);
+               threaded_bench(s, batch_size);
+#if COUNTERS
+               fprintf(stderr, "\rbatch: %4llu\t size: %4d\trwlocks: %8llu\t"
+                               "rd: %8llu\twr: %8llu", ++batch, batch_size,
+                               atomic_load_explicit(&total_locks,   memory_order_relaxed),
+                               atomic_load_explicit(&total_rdlocks, memory_order_relaxed),
+                               atomic_load_explicit(&total_wrlocks, memory_order_relaxed));
+#endif
+       } while (!dt_stat_stable(s));
+#if COUNTERS
+       fprintf(stderr, "\n");
+#endif
+       dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_rwlock_bench, "Uncontended rwlock",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       rwlock_bench(true);
+}
+
+T_DECL(perf_contended_rwlock_bench, "Contended rwlock",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       rwlock_bench(false);
+}
+
+#pragma mark -
+
+static os_unfair_lock unfair_lock;
+
+static void *
+unfair_lock_bench_thread(void * arg)
+{
+       kern_return_t kr;
+       unsigned int seed;
+       volatile double dummy;
+
+restart:
+       seed = (uintptr_t)arg; // each thread repeats its own sequence
+       kr = semaphore_wait_signal(start_sem, ready_sem);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+       while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+               uint64_t inner, outer;
+               random_busy_counts(&seed, &inner, &outer);
+               dummy = busy(outer);
+               os_unfair_lock_lock(&unfair_lock);
+               dummy = busy(inner);
+               os_unfair_lock_unlock(&unfair_lock);
+               ctr_inc(total_locks);
+       }
+
+       if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+               kr = semaphore_signal(end_sem);
+               T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+       }
+       goto restart;
+}
+
+static void
+unfair_lock_bench(bool singlethreaded)
+{
+       int r;
+       int batch_size;
+#if COUNTERS
+       uint64_t batch = 0;
+#endif
+
+       setup_threaded_bench(unfair_lock_bench_thread, singlethreaded);
+
+       dt_stat_time_t s = dt_stat_time_create("%llu os_unfair_lock_lock & "
+                       "os_unfair_lock_unlock on %u thread%s",
+                       iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+       do {
+               batch_size = dt_stat_batch_size(s);
+               threaded_bench(s, batch_size);
+#if COUNTERS
+               fprintf(stderr, "\rbatch: %4llu\t size: %4d\tunfair_locks: %8llu",
+                               ++batch, batch_size,
+                               atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+       } while (!dt_stat_stable(s));
+#if COUNTERS
+       fprintf(stderr, "\n");
+#endif
+       dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_unfair_lock_bench, "Unontended unfair lock",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       unfair_lock_bench(true);
+}
+
+T_DECL(perf_contended_unfair_lock_bench, "Contended unfair lock",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       unfair_lock_bench(false);
+}
+
+#pragma mark -
+
+static pthread_mutex_t ffmutex;
+
+static void *
+ffmutex_bench_thread(void * arg)
+{
+       kern_return_t kr;
+       int r;
+       unsigned int seed;
+       volatile double dummy;
+
+restart:
+       seed = (uintptr_t)arg; // each thread repeats its own sequence
+       kr = semaphore_wait_signal(start_sem, ready_sem);
+       T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+       while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+               uint64_t inner, outer;
+               random_busy_counts(&seed, &inner, &outer);
+               dummy = busy(outer);
+               r = pthread_mutex_lock(&ffmutex);
+               iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");}
+               dummy = busy(inner);
+               r = pthread_mutex_unlock(&ffmutex);
+               iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");}
+               ctr_inc(total_locks);
+       }
+
+       if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+               kr = semaphore_signal(end_sem);
+               T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+       }
+       goto restart;
+}
+
+static void
+ffmutex_bench(bool singlethreaded)
+{
+       int r;
+       int batch_size;
+#if COUNTERS
+       uint64_t batch = 0;
+#endif
+
+       setup_threaded_bench(ffmutex_bench_thread, singlethreaded);
+
+       pthread_mutexattr_t attr;
+       r = pthread_mutexattr_init(&attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init");
+       pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np");
+       r = pthread_mutex_init(&ffmutex, &attr);
+       T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init");
+
+       dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & "
+                       "pthread_mutex_unlock (first-fit) on %u thread%s",
+                       iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+       do {
+               batch_size = dt_stat_batch_size(s);
+               threaded_bench(s, batch_size);
+#if COUNTERS
+               fprintf(stderr, "\rbatch: %4llu\t size: %4d\tffmutexes: %8llu",
+                               ++batch, batch_size,
+                               atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+       } while (!dt_stat_stable(s));
+#if COUNTERS
+       fprintf(stderr, "\n");
+#endif
+       dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_ffmutex_bench, "Uncontended first-fit mutex",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       ffmutex_bench(true);
+}
+
+T_DECL(perf_contended_ffmutex_bench, "Contended first-fit mutex",
+               T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+               T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+       ffmutex_bench(false);
+}
diff --git a/tests/pthread_dependency.c b/tests/pthread_dependency.c

new file mode 100644 (file)

index 0000000..a6fd316
--- /dev/null
+++ b/tests/pthread_dependency.c
@@ -0,0 +1,78 @@
+#include "darwintest_defaults.h"
+#include <darwintest_utils.h>
+#include <pthread/dependency_private.h>
+
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
+static struct job {
+       pthread_dependency_t *req;
+       useconds_t usleep;
+       int done;
+} job;
+
+static void *
+do_test(void *__unused arg)
+{
+       pthread_mutex_lock(&mutex);
+
+       while (!job.done) {
+               while (job.req == 0) {
+                       pthread_cond_wait(&cond, &mutex);
+               }
+               if (job.usleep) usleep(job.usleep);
+               pthread_dependency_fulfill_np(job.req, job.req);
+               job.req = NULL;
+       }
+
+       pthread_mutex_unlock(&mutex);
+       return NULL;
+}
+
+static void
+post_req(pthread_dependency_t *req, useconds_t delay, bool done)
+{
+       pthread_mutex_lock(&mutex);
+       job.req = req;
+       job.usleep = delay;
+       job.done = done;
+       pthread_cond_signal(&cond);
+       pthread_mutex_unlock(&mutex);
+}
+
+T_DECL(dependency, "dependency", T_META_ALL_VALID_ARCHS(YES))
+{
+       pthread_dependency_t req;
+       pthread_t pth;
+       void *v;
+       int ret;
+
+       T_ASSERT_POSIX_ZERO(pthread_create(&pth, NULL, do_test, NULL), NULL);
+
+       T_LOG("Waiting on a pdependency that takes some time");
+
+       pthread_dependency_init_np(&req, pth, NULL);
+       post_req(&req, 100000, false);
+       v = pthread_dependency_wait_np(&req);
+       T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+       T_LOG("Waiting on a pdependency that is already fulfilled");
+
+       pthread_dependency_init_np(&req, pth, NULL);
+       post_req(&req, 0, false);
+       usleep(100000);
+       v = pthread_dependency_wait_np(&req);
+       T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+       T_LOG("Waiting on a fulfilled pdependency with the other thread exiting");
+
+       pthread_dependency_init_np(&req, pth, NULL);
+       post_req(&req, 0, true);
+       ret = pthread_join(pth, NULL);
+       T_EXPECT_POSIX_ZERO(ret, "pthread_join");
+
+       v = pthread_dependency_wait_np(&req);
+       T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+       T_END;
+}
diff --git a/tests/pthread_threadid_np.c b/tests/pthread_threadid_np.c

index d28ca65e7e52d6257ad704046b6c886b7aa2a90c..19cfc254f5f78c8d0df6a4cb72c79f8ea5bab71a 100644 (file)
--- a/tests/pthread_threadid_np.c
+++ b/tests/pthread_threadid_np.c
@@ -9,7 +9,7 @@ extern __uint64_t __thread_selfid( void );
  static void *do_test(void * __unused arg)
  {
         uint64_t threadid = __thread_selfid();
-       T_ASSERT_NOTNULL(threadid, NULL);
+       T_ASSERT_NE(threadid, (uint64_t)0, "__thread_selfid()");
  
         uint64_t pth_threadid = 0;
         T_ASSERT_POSIX_ZERO(pthread_threadid_np(NULL, &pth_threadid), NULL);
diff --git a/tests/rdar_32848402.c b/tests/rdar_32848402.c

index 65cd56e5395b34dc455b18a7922ecf21dbc4bdb5..068836a7695e96276a635eefd5959d322f28112e 100644 (file)
--- a/tests/rdar_32848402.c
+++ b/tests/rdar_32848402.c
@@ -72,7 +72,9 @@ T_DECL(thread_request_32848402, "repro for rdar://32848402")
         end_spin = clock_gettime_nsec_np(CLOCK_MONOTONIC) + 2 * NSEC_PER_SEC;
  
         dispatch_async_f(a, (void *)0, spin_and_pause);
-       for (long i = 1; i < get_ncpu(); i++) {
+       long n_threads = MIN((long)get_ncpu(),
+                       pthread_qos_max_parallelism(QOS_CLASS_BACKGROUND, 0));
+       for (long i = 1; i < n_threads; i++) {
                 dispatch_async_f(b, (void *)i, spin);
         }
  
diff --git a/tests/stack.c b/tests/stack.c

new file mode 100644 (file)

index 0000000..f910b28
--- /dev/null
+++ b/tests/stack.c
@@ -0,0 +1,82 @@
+#include <signal.h>
+#include <pthread/stack_np.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_utils.h>
+
+#if defined(__arm64__)
+#define call_chkstk(value) \
+               __asm__ volatile("orr x9, xzr, %0\t\n" \
+                               "bl _thread_chkstk_darwin" : : "i"(value) : "x9")
+#define TRAPSIG SIGTRAP
+#elif defined(__x86_64__)
+#define call_chkstk(value) \
+               __asm__ volatile("movq %0, %%rax\t\n" \
+                               "callq _thread_chkstk_darwin" : : "i"(value) : "rax")
+#define TRAPSIG SIGILL
+#elif defined(__i386__)
+#define call_chkstk(value) \
+               __asm__ volatile("movl %0, %%eax\t\n" \
+                               "calll _thread_chkstk_darwin" : : "i"(value) : "eax")
+#define TRAPSIG SIGILL
+#endif
+
+static void
+got_signal(int signo __unused)
+{
+       T_PASS("calling with 1 << 24 crashed");
+       T_END;
+}
+
+T_DECL(chkstk, "chkstk",
+               T_META_ALL_VALID_ARCHS(YES), T_META_CHECK_LEAKS(NO))
+{
+#if defined(__arm__)
+       T_SKIP("not on armv7");
+#else
+
+       call_chkstk(1 << 8);
+       T_PASS("calling with 1 << 8");
+
+       call_chkstk(1 << 16);
+       T_PASS("calling with 1 << 16");
+
+       signal(TRAPSIG, got_signal);
+
+       call_chkstk(1 << 24);
+       T_FAIL("should have crashed");
+#endif
+}
+
+struct frame {
+       uintptr_t frame;
+       uintptr_t ret;
+};
+
+OS_NOINLINE OS_NOT_TAIL_CALLED
+static void
+do_stack_frame_decode_test(struct frame frames[], size_t n, size_t count)
+{
+       if (n < count) {
+               frames[n].frame = (uintptr_t)__builtin_frame_address(1);
+               frames[n].ret = (uintptr_t)__builtin_return_address(0);
+               do_stack_frame_decode_test(frames, n + 1, count);
+       } else {
+               uintptr_t frame = (uintptr_t)__builtin_frame_address(1);
+               uintptr_t ret;
+               while (count-- > 0) {
+                       frame = pthread_stack_frame_decode_np(frame, &ret);
+                       T_EXPECT_EQ(frames[count].frame, frame, "Frame %zd", count);
+                       T_EXPECT_EQ(frames[count].ret, ret, "Retaddr %zd", count);
+               }
+       }
+}
+
+T_DECL(pthread_stack_frame_decode_np, "pthread_stack_frame_decode_np",
+               T_META_ALL_VALID_ARCHS(YES), T_META_CHECK_LEAKS(NO))
+{
+       struct frame frames[10];
+       frames[0].frame = (uintptr_t)__builtin_frame_address(1);
+       frames[0].ret = (uintptr_t)__builtin_return_address(0);
+       do_stack_frame_decode_test(frames, 1, 10);
+}
diff --git a/tests/stack_aslr.c b/tests/stack_aslr.c

index a8dab42eb170c3b69a283b0126a0161220b28008..aaf483ebc8f58d1527c2cbc980c452c5241f53e9 100644 (file)
--- a/tests/stack_aslr.c
+++ b/tests/stack_aslr.c
@@ -133,7 +133,7 @@ again:
  
         for (int i = 0; i < attempts; i++) {
                 char *t;
-               asprintf(&t, "%s/%zd", tmp, i);
+               asprintf(&t, "%s/%d", tmp, i);
                 T_QUIET; T_ASSERT_POSIX_SUCCESS(mkdir(t, 0700), "mkdir");
                 setenv("BATS_TMP_DIR", t, 1); // hack to workaround rdar://33443485
                 free(t);
@@ -144,7 +144,7 @@ again:
                                 T_QUIET; T_FAIL("Helper should complete in <.1s");
                                 goto timeout;
                         }
-                       usleep(1000);
+                       usleep(1000 * 100);
                 } while (shmem->done <= i);
         }
         setenv("BATS_TMP_DIR", tmpdir, 1);
diff --git a/tests/stack_size.c b/tests/stack_size.c

new file mode 100644 (file)

index 0000000..3a52747
--- /dev/null
+++ b/tests/stack_size.c
@@ -0,0 +1,81 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "darwintest_defaults.h"
+
+#define PTHREAD_T_OFFSET (0)
+
+static void *
+function(void *arg)
+{
+       size_t expected_size = (size_t)(uintptr_t)arg;
+       T_ASSERT_EQ(pthread_get_stacksize_np(pthread_self()), expected_size,
+                       "saw expected pthread_get_stacksize_np");
+       return NULL;
+}
+
+T_DECL(stack_size_default, "stack size of default pthread",
+               T_META_ALL_VALID_ARCHS(YES))
+{
+       static const size_t dflsize = 512 * 1024;
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+                       (void *)(dflsize + PTHREAD_T_OFFSET)), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+}
+
+T_DECL(stack_size_customsize, "stack size of thread with custom stack size",
+               T_META_ALL_VALID_ARCHS(YES))
+{
+       static const size_t stksize = 768 * 1024;
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_attr_setstacksize(&attr, stksize), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+                       (void *)(stksize + PTHREAD_T_OFFSET)), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+}
+
+T_DECL(stack_size_customaddr, "stack size of thread with custom stack addr",
+               T_META_ALL_VALID_ARCHS(YES))
+{
+       static const size_t stksize = 512 * 1024;
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       uintptr_t stackaddr = (uintptr_t)valloc(stksize);
+       stackaddr += stksize; // address is top of stack
+
+       T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_attr_setstackaddr(&attr, (void *)stackaddr),
+                       NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+                       (void *)stksize), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+       free((void *)(stackaddr - stksize));
+}
+
+T_DECL(stack_size_custom, "stack size of thread with custom stack addr+size",
+               T_META_ALL_VALID_ARCHS(YES))
+{
+       static const size_t stksize = 768 * 1024;
+       pthread_t thread;
+       pthread_attr_t attr;
+
+       uintptr_t stackaddr = (uintptr_t)valloc(stksize);
+       stackaddr += stksize; // address is top of stack
+
+       T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_attr_setstackaddr(&attr, (void *)stackaddr),
+                       NULL);
+       T_ASSERT_POSIX_ZERO(pthread_attr_setstacksize(&attr, stksize), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+                       (void *)stksize), NULL);
+       T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+       free((void *)(stackaddr - stksize));
+}
diff --git a/tools/locktrace.lua b/tools/locktrace.lua

index ecc64bc1987d46e94b7aa9873277a755bba39e8f..bb5380de1a4aa166537211c9f27d0bab5463e577 100755 (executable)
--- a/tools/locktrace.lua
+++ b/tools/locktrace.lua
@@ -28,10 +28,26 @@ get_prefix = function(buf)
         local proc
         proc = buf.command
  
-       return string.format("%s %6.9f %-17s [%05d.%06x] %-24s",
+       return string.format("%s %6.9f %-17s [%05d.%06x] %-35s",
                 prefix, secs, proc, buf.pid, buf.threadid, buf.debugname)
  end
  
+get_count = function(val)
+       return ((val & 0xffffff00) >> 8)
+end
+
+get_kwq_type = function(val)
+       if val & 0xff == 0x1 then
+               return "MTX"
+       elseif val & 0xff == 0x2 then
+               return "CVAR"
+       elseif val & 0xff == 0x4 then
+               return "RWL"
+       else
+               return string.format("0x%04x", val)
+       end
+end
+
  decode_lval = function(lval)
         local kbit = " "
         if lval & 0x1 ~= 0 then
@@ -61,61 +77,282 @@ decode_sval = function(sval)
         end
  
         local count = sval >> 8
-       return string.format("[0x%06x, %s%s]", count, ibit, sbit)
+       return string.format("[0x%06x,  %s%s]", count, ibit, sbit)
+end
+
+decode_cv_sval = function(sval)
+       local sbit = " "
+       if sval & 0x1 ~= 0 then
+               sbit = "C"
+       end
+       local ibit = " "
+       if sval & 0x2 ~= 0 then
+               ibit = "P"
+       end
+
+       local count = sval >> 8
+       return string.format("[0x%06x,  %s%s]", count, ibit, sbit)
  end
  
  trace_codename("psynch_mutex_lock_updatebits", function(buf)
         local prefix = get_prefix(buf)
         if buf[4] == 0 then
-               printf("%s\tupdated lock bits, pre-kernel (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+               printf("%s\tupdated lock bits, pre-kernel\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+                               prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
         else
-               printf("%s\tupdated lock bits, post-kernel (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+               printf("%s\tupdated lock bits, post-kernel\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+                               prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
         end
  end)
  
  trace_codename("psynch_mutex_unlock_updatebits", function(buf)
         local prefix = get_prefix(buf)
-       printf("%s\tupdated unlock bits (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+       printf("%s\tupdated unlock bits\t\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+end)
+
+trace_codename("psynch_ffmutex_lock_updatebits", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tlock path, bits update\t\taddr: 0x%016x\toldl: %s\toldu: %s\twaiters: %d\n",
+                               prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), get_count(buf[2]) - get_count(buf[3]))
+       else
+               printf("%s\tlock path, bits update\t\taddr: 0x%016x\tnewl: %s\tnewu: %s\twaiters: %d\n",
+                               prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), get_count(buf[2]) - get_count(buf[3]))
+       end
+end)
+
+trace_codename("psynch_ffmutex_unlock_updatebits", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tunlock path, update bits\taddr: 0x%016x\toldl: %s\tnewl: %s\tnewu: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+end)
+
+trace_codename("psynch_ffmutex_wake", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tfirst fit kernel wake\t\taddr: 0x%016x\tlval: %s\tuval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+end)
+
+trace_codename("psynch_ffmutex_wait", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tfirstfit kernel wait\t\taddr: 0x%016x\tlval: %s\tuval: %s\tflags: 0x%x\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
+       else
+               printf("%s\tfirstfit kernel wait\t\taddr: 0x%016x\trval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]))
+       end
  end)
  
  trace_codename("psynch_mutex_ulock", function(buf)
         local prefix = get_prefix(buf)
  
         if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tlock busy, waiting in kernel (addr: 0x%016x, lval: %s, sval: %s, owner_tid: 0x%x)\n",
+               printf("%s\tlock busy, waiting in kernel\taddr: 0x%016x\tlval: %s\tsval: %s\towner_tid: 0x%x\n",
                         prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
         elseif trace.debugid_is_end(buf.debugid) then
-               printf("%s\tlock acquired from kernel (addr: 0x%016x, updated bits: %s)\n",
+               printf("%s\tlock acquired from kernel\taddr: 0x%016x\tupdt: %s\n",
                         prefix, buf[1], decode_lval(buf[2]))
         else
-               printf("%s\tlock taken, uncontended (addr: 0x%016x, lval: %s, sval: %s)\n",
+               printf("%s\tlock taken userspace\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
                         prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
         end
  end)
  
  trace_codename("psynch_mutex_utrylock_failed", function(buf)
         local prefix = get_prefix(buf)
-       printf("%s\tmutex trybusy addr: 0x%016x lval: %s sval: %s owner: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
+       printf("%s\tmutex trybusy\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\towner: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
  end)
  
  trace_codename("psynch_mutex_uunlock", function(buf)
         local prefix = get_prefix(buf)
  
         if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tunlock, signalling kernel waiters (addr: 0x%016x, lval: %s, sval: %s, owner_tid: 0x%x)\n",
+               printf("%s\tunlock, signalling kernel\taddr: 0x%016x\tlval: %s\tsval: %s\towner_tid: 0x%x\n",
                         prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
         elseif trace.debugid_is_end(buf.debugid) then
-               printf("%s\tunlock, waiters signalled (addr: 0x%016x, updated bits: %s)\n",
+               printf("%s\tunlock, waiters signalled\taddr: 0x%016x\tupdt: %s\n",
                         prefix, buf[1], decode_lval(buf[2]))
         else
-               printf("%s\tunlock, no kernel waiters (addr: 0x%016x, lval: %s, sval: %s)\n",
+               printf("%s\tunlock, no kernel waiters\taddr: 0x%016x\tlval: %s\tsval: %s\n",
                         prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
         end
  end)
  
--- The trace codes we need aren't enabled by default
-darwin.sysctlbyname("kern.pthread_debug_tracing", 1)
-completion_handler = function()
-       darwin.sysctlbyname("kern.pthread_debug_tracing", 0)
-end
-trace.set_completion_handler(completion_handler)
+trace_codename("psynch_mutex_clearprepost", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tclear prepost\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
+               prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+end)
+
+trace_codename("psynch_mutex_markprepost", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tmark prepost\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+       else
+               printf("%s\tmark prepost\t\t\taddr: 0x%016x\tcleared: %d\n",
+                       prefix, buf[1], buf[2])
+       end
+end)
+
+trace_codename("psynch_mutex_kwqallocate", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel kwq allocated\t\taddr: 0x%016x\ttype: %s\tkwq: 0x%016x\n",
+                       prefix, buf[1], get_kwq_type(buf[2]), buf[3])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tkernel kwq allocated\t\taddr: 0x%016x\tlval: %s\tuval: %s\tsval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+       end
+end)
+
+trace_codename("psynch_mutex_kwqdeallocate", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel kwq deallocated\t\taddr: 0x%016x\ttype: %s\tfreenow: %d\n",
+                       prefix, buf[1], get_kwq_type(buf[2]), buf[3])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tkernel kwq deallocated\t\taddr: 0x%016x\tlval: %s\tuval: %s\tsval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+       end
+end)
+
+trace_codename("psynch_mutex_kwqprepost", function(buf)
+       local prefix = get_prefix(buf)
+       if buf[4] == 0 then
+               printf("%s\tkernel prepost incremented\taddr: 0x%016x\tlval: %s\tinqueue: %d\n",
+                       prefix, buf[1], decode_lval(buf[2]), buf[3])
+       elseif buf[4] == 1 then
+               printf("%s\tkernel prepost decremented\taddr: 0x%016x\tlval: %s\tremaining: %d\n",
+                       prefix, buf[1], decode_lval(buf[2]), buf[3])
+       elseif buf[4] == 2 then
+               printf("%s\tkernel prepost cleared\t\taddr: 0x%016x\tlval: %s\n", prefix,
+                       buf[1], decode_lval(buf[2]))
+       end
+end)
+
+trace_codename("psynch_mutex_kwqcollision", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tkernel kwq collision\t\taddr: 0x%016x\ttype: %d\n", prefix,
+               buf[1], buf[2])
+end)
+
+trace_codename("psynch_mutex_kwqsignal", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel mutex signal\t\taddr: 0x%016x\tkwe: 0x%16x\ttid: 0x%x\tinqueue: %d\n",
+                       prefix, buf[1], buf[2], buf[3], buf[4]);
+       else
+               printf("%s\tkernel mutex signal\t\taddr: 0x%016x\tkwe: 0x%16x\tret: 0x%x\n",
+                       prefix, buf[1], buf[2], buf[3]);
+       end
+end)
+
+trace_codename("psynch_mutex_kwqwait", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tkernel mutex wait\t\taddr: 0x%016x\tinqueue: %d\tprepost: %d\tintr: %d\n",
+               prefix, buf[1], buf[2], buf[3], buf[4])
+end)
+
+trace_codename("psynch_cvar_kwait", function(buf)
+       local prefix = get_prefix(buf)
+       if buf[4] == 0 then
+               printf("%s\tkernel condvar wait\t\taddr: 0x%016x\tmutex: 0x%016x\tcgen: 0x%x\n",
+                       prefix, buf[1], buf[2], buf[3])
+       elseif buf[4] == 1 then
+               printf("%s\tkernel condvar sleep\t\taddr: 0x%016x\tflags: 0x%x\n",
+                       prefix, buf[1], buf[3])
+       elseif buf[4] == 2 then
+               printf("%s\tkernel condvar wait return\taddr: 0x%016x\terror: 0x%x\tupdt: 0x%x\n",
+                       prefix, buf[1], buf[2], buf[3])
+       elseif buf[4] == 3 and (buf[2] & 0xff) == 60 then
+               printf("%s\tkernel condvar timeout\t\taddr: 0x%016x\terror: 0x%x\n",
+                       prefix, buf[1], buf[2])
+       elseif buf[4] == 3 then
+               printf("%s\tkernel condvar wait error\taddr: 0x%016x\terror: 0x%x\n",
+                       prefix, buf[1], buf[2])
+       elseif buf[4] == 4 then
+               printf("%s\tkernel condvar wait return\taddr: 0x%016x\tupdt: 0x%x\n",
+                       prefix, buf[1], buf[2])
+       end
+end)
+
+trace_codename("psynch_cvar_clrprepost", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tkernel condvar clear prepost:\taddr: 0x%016x\ttype: 0x%x\tprepost seq: %s\n",
+               prefix, buf[1], buf[2], decode_lval(buf[3]))
+end)
+
+trace_codename("psynch_cvar_freeitems", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tcvar free fake/prepost items\taddr: 0x%016x\ttype: %d\t\t\tupto: %s\tall: %d\n",
+                       prefix, buf[1], buf[2], decode_lval(buf[3]), buf[4])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tcvar free fake/prepost items\taddr: 0x%016x\tfreed: %d\tsignaled: %d\tinqueue: %d\n",
+                       prefix, buf[1], buf[2], buf[3], buf[4])
+       elseif buf[4] == 1 then
+               printf("%s\tcvar free, signalling waiter\taddr: 0x%016x\tinqueue: %d\tkwe: 0x%016x\n",
+                       prefix, buf[1], buf[3], buf[2])
+       elseif buf[4] == 2 then
+               printf("%s\tcvar free, removing fake\taddr: 0x%016x\tinqueue: %d\tkwe: 0x%016x\n",
+                       prefix, buf[1], buf[3], buf[2])
+       end
+end)
+
+trace_codename("psynch_cvar_signal", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tfrom: %s\tupto: %s\tbroad: %d\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), buf[4])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tupdt: %s\n",
+                       prefix, buf[1], decode_cv_sval(buf[2]))
+       else
+               printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tsignalled waiters (converted to broadcast: %d)\n",
+                       prefix, buf[1], buf[2])
+       end
+end)
+
+trace_codename("psynch_cvar_broadcast", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tupto: %s\tinqueue: %d\n",
+                       prefix, buf[1], decode_lval(buf[2]), buf[3])
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tupdt: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]))
+       elseif buf[4] == 1 then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tsignalling: 0x%16x\n",
+                       prefix, buf[1], buf[2])
+       elseif buf[4] == 2 then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tremoving fake: 0x%16x\tstate: %d\n",
+                       prefix, buf[1], buf[2], buf[3])
+       elseif buf[4] == 3 then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tprepost\tlval: %s\tsval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2]), decode_cv_sval(buf[3]))
+       elseif buf[4] == 4 then
+               printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tbroadcast prepost: 0x%016x\n",
+                       prefix, buf[1], buf[2])
+       end
+end)
+
+trace_codename("psynch_cvar_zeroed", function(buf)
+       local prefix = get_prefix(buf)
+       printf("%s\tkernel cvar zeroed\t\taddr: 0x%016x\tlval: %s\tsval: %s\tinqueue: %d\n",
+               prefix, buf[1], decode_lval(buf[2]), decode_cv_sval(buf[3]), buf[4])
+end)
+
+trace_codename("psynch_cvar_updateval", function(buf)
+       local prefix = get_prefix(buf)
+       if trace.debugid_is_start(buf.debugid) then
+               printf("%s\tcvar updateval\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\tupdateval: %s\n",
+                       prefix, buf[1], decode_lval(buf[2] & 0xffffffff), decode_cv_sval(buf[2] >> 32), decode_cv_sval(buf[3]))
+       elseif trace.debugid_is_end(buf.debugid) then
+               printf("%s\tcvar updateval (updated)\taddr: 0x%016x\tlval: %s\tsval: %s\tdiffgen: %d\tneedsclear: %d\n",
+                       prefix, buf[1], decode_lval(buf[2] & 0xffffffff), decode_cv_sval(buf[2] >> 32), buf[3] >> 32, buf[3] & 0x1)
+       end
+end)
+
diff --git a/tools/wqtrace.lua b/tools/wqtrace.lua

deleted file mode 100755 (executable)

index 2da03da..0000000
--- a/tools/wqtrace.lua
+++ /dev/null
@@ -1,349 +0,0 @@
-#!/usr/local/bin/luatrace -s
-
-trace_codename = function(codename, callback)
-       local debugid = trace.debugid(codename)
-       if debugid ~= 0 then
-               trace.single(debugid,callback)
-       else
-               printf("WARNING: Cannot locate debugid for '%s'\n", codename)
-       end
-end
-
-initial_timestamp = 0
-workqueue_ptr_map = {};
-get_prefix = function(buf)
-       if initial_timestamp == 0 then
-               initial_timestamp = buf.timestamp
-       end
-       local secs = trace.convert_timestamp_to_nanoseconds(buf.timestamp - initial_timestamp) / 1000000000
-
-       local prefix
-       if trace.debugid_is_start(buf.debugid) then
-               prefix = "→"
-       elseif trace.debugid_is_end(buf.debugid) then
-               prefix = "←"
-       else
-               prefix = "↔"
-       end
-
-       local proc
-       if buf.command ~= "kernel_task" then
-               proc = buf.command
-               workqueue_ptr_map[buf[1]] = buf.command
-       elseif workqueue_ptr_map[buf[1]] ~= nil then
-               proc = workqueue_ptr_map[buf[1]]
-       else
-               proc = "UNKNOWN"
-       end
-
-       return string.format("%s %6.9f %-17s [%05d.%06x] %-24s",
-               prefix, secs, proc, buf.pid, buf.threadid, buf.debugname)
-end
-
-parse_pthread_priority = function(pri)
-       pri = pri & 0xffffffff
-       if (pri & 0x02000000) == 0x02000000 then
-               return "Manager"
-       end
-       local qos = (pri & 0x00ffff00) >> 8
-       if qos == 0x20 then
-               return string.format("UI[%x]", pri);
-       elseif qos == 0x10 then
-               return string.format("IN[%x]", pri);
-       elseif qos == 0x08 then
-               return string.format("DF[%x]", pri);
-       elseif qos == 0x04 then
-               return string.format("UT[%x]", pri);
-       elseif qos == 0x02 then
-               return string.format("BG[%x]", pri);
-       elseif qos == 0x01 then
-               return string.format("MT[%x]", pri);
-       elseif qos == 0x00 then
-               return string.format("--[%x]", pri);
-       else
-               return string.format("??[%x]", pri);
-       end
-end
-
-parse_qos_bucket = function(pri)
-       if pri == 0 then
-               return string.format("UI[%x]", pri);
-       elseif pri == 1 then
-               return string.format("IN[%x]", pri);
-       elseif pri == 2 then
-               return string.format("DF[%x]", pri);
-       elseif pri == 3 then
-               return string.format("UT[%x]", pri);
-       elseif pri == 4 then
-               return string.format("BG[%x]", pri);
-       elseif pri == 5 then
-               return string.format("MT[%x]", pri);
-       elseif pri == 6 then
-               return string.format("MG[%x]", pri);
-       else
-               return string.format("??[%x]", pri);
-       end
-end
-
-parse_thactive_req_bucket = function(pri)
-    if pri ~= 6 then
-        return parse_qos_bucket(pri)
-    end
-    return "None"
-end
-
-get_thactive = function(low, high)
-    return string.format("req: %s, MG: %d, UI: %d, IN: %d, DE: %d, UT: %d, BG: %d, MT: %d",
-           parse_thactive_req_bucket(high >> (16 * 3)), (high >> (2 * 16)) & 0xffff,
-           (low  >> (0 * 16)) & 0xffff, (low  >> (1 * 16)) & 0xffff,
-           (low  >> (2 * 16)) & 0xffff, (low  >> (3 * 16)) & 0xffff,
-           (high >> (0 * 16)) & 0xffff, (high >> (1 * 16)) & 0xffff)
-end
-
--- workqueue lifecycle
-
-trace_codename("wq_pthread_exit", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tprocess is exiting\n",prefix)
-       else
-               printf("%s\tworkqueue marked as exiting and timer is complete\n",prefix)
-       end
-end)
-
-trace_codename("wq_workqueue_exit", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tall threads have exited, cleaning up\n",prefix)
-       else
-               printf("%s\tclean up complete\n",prefix)
-       end
-end)
-
-trace_codename("wq_start_add_timer", function(buf)
-       local prefix = get_prefix(buf)
-       printf("%s\tarming timer to fire in %d us (flags: %x, reqcount: %d)\n",
-               prefix, buf.arg4, buf.arg3, buf.arg2)
-end)
-
-trace_codename("wq_add_timer", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tadd_timer fired (flags: %x, nthreads: %d, thidlecount: %d)\n",
-                       prefix, buf.arg2, buf.arg3, buf.arg4)
-       elseif trace.debugid_is_end(buf.debugid) then
-               printf("%s\tadd_timer completed (start_timer: %x, nthreads: %d, thidlecount: %d)\n",
-                       prefix, buf.arg2, buf.arg3, buf.arg4)
-       else
-               printf("%s\tadd_timer added threads (reqcount: %d, thidlecount: %d, busycount: %d)\n",
-                       prefix, buf.arg2, buf.arg3, buf.arg4)
-
-       end
-end)
-
-trace_codename("wq_run_threadreq", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               if buf[2] > 0 then
-                       printf("%s\trun_threadreq: %x (priority: %s, flags: %d) on %x\n",
-                                       prefix, buf[2], parse_qos_bucket(buf[4] >> 16), buf[4] & 0xff, buf[3])
-               else
-                       printf("%s\trun_threadreq: <none> on %x\n",
-                                       prefix, buf[3])
-               end
-       else
-               if buf[2] == 1 then
-                       printf("%s\tpended event manager, already running\n", prefix)
-               elseif buf[2] == 2 then
-                       printf("%s\tnothing to do\n", prefix)
-               elseif buf[2] == 3 then
-                       printf("%s\tno eligible request found\n", prefix)
-               elseif buf[2] == 4 then
-                       printf("%s\tadmission control failed\n", prefix)
-               elseif buf[2] == 5 then
-                       printf("%s\tunable to add new thread (may_add_new_thread: %d, nthreads: %d)\n", prefix, buf[3], buf[4])
-               elseif buf[2] == 6 then
-                       printf("%s\tthread creation failed\n", prefix)
-               elseif buf[2] == 0 then
-                       printf("%s\tsuccess\n", prefix)
-               else
-                       printf("%s\tWARNING: UNKNOWN END CODE:%d\n", prefix, buf.arg4)
-               end
-       end
-end)
-
-trace_codename("wq_run_threadreq_mgr_merge", function(buf)
-       local prefix = get_prefix(buf)
-       printf("%s\t\tmerging incoming manager request into existing\n", prefix)
-end)
-
-trace_codename("wq_run_threadreq_req_select", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[3] == 1 then
-               printf("%s\t\tselected event manager request %x\n", prefix, buf[2])
-       elseif buf[3] == 2 then
-               printf("%s\t\tselected overcommit request %x\n", prefix, buf[2])
-       elseif buf[3] == 3 then
-               printf("%s\t\tselected constrained request %x\n", prefix, buf[2])
-       else
-               printf("%s\t\tWARNING: UNKNOWN DECISION CODE:%d\n", prefix, buf.arg[3])
-       end
-end)
-
-trace_codename("wq_run_threadreq_thread_select", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[2] == 1 then
-               printf("%s\t\trunning on current thread %x\n", prefix, buf[3])
-       elseif buf[2] == 2 then
-               printf("%s\t\trunning on idle thread %x\n", prefix, buf[3])
-       elseif buf[2] == 3 then
-               printf("%s\t\tcreated new thread\n", prefix)
-       else
-               printf("%s\t\tWARNING: UNKNOWN DECISION CODE:%d\n", prefix, buf.arg[2])
-       end
-end)
-
-trace_codename("wq_thread_reset_priority", function(buf)
-       local prefix = get_prefix(buf)
-       local old_qos = buf[3] >> 16;
-       local new_qos = buf[3] & 0xff;
-       if buf[4] == 1 then
-               printf("%s\t\treset priority of %x from %s to %s\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-       elseif buf[4] == 2 then
-               printf("%s\t\treset priority of %x from %s to %s for reserve manager\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-       elseif buf[4] == 3 then
-               printf("%s\t\treset priority of %x from %s to %s for cleanup\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-       end
-end)
-
-trace_codename("wq_thread_park", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tthread parking\n", prefix)
-       else
-               printf("%s\tthread woken\n", prefix)
-       end
-end)
-
-trace_codename("wq_thread_squash", function(buf)
-       local prefix = get_prefix(buf)
-       printf("%s\tthread squashed from %s to %s\n", prefix,
-                       parse_qos_bucket(buf[2]), parse_qos_bucket(buf[3]))
-end)
-
-trace.enable_thread_cputime()
-runitem_time_map = {}
-runitem_cputime_map = {}
-trace_codename("wq_runitem", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               runitem_time_map[buf.threadid] = buf.timestamp;
-               runitem_cputime_map[buf.threadid] = trace.cputime_for_thread(buf.threadid);
-
-               printf("%s\tSTART running item @ %s\n", prefix, parse_qos_bucket(buf[3]))
-       elseif runitem_time_map[buf.threadid] then
-               local time = buf.timestamp - runitem_time_map[buf.threadid]
-               local cputime = trace.cputime_for_thread(buf.threadid) - runitem_cputime_map[buf.threadid]
-
-               local time_ms = trace.convert_timestamp_to_nanoseconds(time) / 1000000
-               local cputime_ms = trace.convert_timestamp_to_nanoseconds(cputime) / 1000000
-
-               printf("%s\tDONE running item @ %s: time = %6.6f ms, cputime = %6.6f ms\n",
-                               prefix, parse_qos_bucket(buf[2]), time_ms, cputime_ms)
-
-               runitem_time_map[buf.threadid] = 0
-               runitem_cputime_map[buf.threadid] = 0
-       else
-               printf("%s\tDONE running item @ %s\n", prefix, parse_qos_bucket(buf[2]))
-       end
-end)
-
-trace_codename("wq_runthread", function(buf)
-       local prefix = get_prefix(buf)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tSTART running thread\n", prefix)
-       elseif trace.debugid_is_end(buf.debugid) then
-               printf("%s\tDONE running thread\n", prefix)
-       end
-end)
-
-trace_codename("wq_thactive_update", function(buf)
-    local prefix = get_prefix(buf)
-    local thactive = get_thactive(buf[2], buf[3])
-    if buf[1] == 1 then
-        printf("%s\tthactive constrained pre-post (%s)\n", prefix, thactive)
-    elseif buf[1] == 2 then
-        printf("%s\tthactive constrained run (%s)\n", prefix, thactive)
-    else
-        return
-    end
-end)
-
-trace_codename("wq_thread_block", function(buf)
-       local prefix = get_prefix(buf)
-        local req_pri = parse_thactive_req_bucket(buf[3] >> 8)
-       if trace.debugid_is_start(buf.debugid) then
-               printf("%s\tthread blocked (activecount: %d, priority: %s, req_pri: %s, reqcount: %d, start_timer: %d)\n",
-                       prefix, buf[2], parse_qos_bucket(buf[3] & 0xff), req_pri, buf[4] >> 1, buf[4] & 0x1)
-       else
-               printf("%s\tthread unblocked (activecount: %d, priority: %s, req_pri: %s, threads_scheduled: %d)\n",
-                       prefix, buf[2], parse_qos_bucket(buf[3] & 0xff), req_pri, buf[4])
-       end
-end)
-
-trace_codename("wq_thread_create_failed", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[3] == 0 then
-               printf("%s\tfailed to create new workqueue thread, kern_return: 0x%x\n",
-                       prefix, buf[2])
-       elseif buf[3] == 1 then
-               printf("%s\tfailed to vm_map workq thread stack: 0x%x\n", prefix, buf[2])
-       elseif buf[3] == 2 then
-               printf("%s\tfailed to vm_protect workq thread guardsize: 0x%x\n", prefix, buf[2])
-       end
-end)
-
-trace_codename("wq_thread_create", function(buf)
-       printf("%s\tcreated new workqueue thread\n", get_prefix(buf))
-end)
-
-trace_codename("wq_wqops_reqthreads", function(buf)
-       local prefix = get_prefix(buf)
-       printf("%s\tuserspace requested %d threads at %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-end)
-
-trace_codename("wq_kevent_reqthreads", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[4] == 0 then
-               printf("%s\tkevent requested a thread at %s\n", prefix, parse_pthread_priority(buf[3]));
-       elseif buf[4] == 1 then
-               printf("%s\tworkloop requested a thread for req %x at %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-       elseif buf[4] == 2 then
-               printf("%s\tworkloop updated priority of req %x to %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-       elseif buf[4] == 3 then
-               printf("%s\tworkloop canceled req %x\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-       elseif buf[4] == 4 then
-               printf("%s\tworkloop redrove a thread request\n", prefix);
-       end
-end)
-
-trace_codename("wq_constrained_admission", function(buf)
-       local prefix = get_prefix(buf)
-       if buf[2] == 1 then
-               printf("fail: %s\twq_constrained_threads_scheduled=%d >= wq_max_constrained_threads=%d\n",
-                prefix, buf[3], buf[4])
-       elseif (buf[2] == 2) or (buf[2] == 3) then
-               local success = nil;
-               if buf[2] == 2 then success = "success"
-               else success = "fail" end
-               printf("%s: %s\tthactive_count=%d + busycount=%d >= wq->wq_max_concurrency\n",
-                               prefix, success, buf[3], buf[4])
-       end
-end)
-
--- The trace codes we need aren't enabled by default
-darwin.sysctlbyname("kern.pthread_debug_tracing", 1)
-completion_handler = function()
-       darwin.sysctlbyname("kern.pthread_debug_tracing", 0)
-end
-trace.set_completion_handler(completion_handler)
diff --git a/xcodescripts/install-lldbmacros.sh b/xcodescripts/install-lldbmacros.sh

index e50ee44abe044946da915f5183af36116a81e5c6..9501f969e6bd98298022414bb311804ac367b7ed 100644 (file)
--- a/xcodescripts/install-lldbmacros.sh
+++ b/xcodescripts/install-lldbmacros.sh
@@ -2,4 +2,17 @@
  # install the pthread lldbmacros into the module
  
  mkdir -p $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python || true
-rsync -aq $SRCROOT/lldbmacros/* $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python/
+rsync -aq $SRCROOT/lldbmacros/* $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python
+
+for variant in $BUILD_VARIANTS; do
+       case $variant in
+       normal)
+               SUFFIX=""
+               ;;
+       *)
+               SUFFIX="_$variant"
+               ;;
+       esac
+
+       ln -sf init.py $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python/$EXECUTABLE_NAME$SUFFIX.py
+done
diff --git a/xcodescripts/kext.xcconfig b/xcodescripts/kext.xcconfig

index fcd42ea2477ea2d531e600d310881b99ad94be5a..84e90796bddc820d2695bfef0642ac03a1429923 100644 (file)
--- a/xcodescripts/kext.xcconfig
+++ b/xcodescripts/kext.xcconfig
@@ -41,7 +41,7 @@ LLVM_LTO_development = NO
  LLVM_LTO_kasan = NO
  LLVM_LTO = $(LLVM_LTO_$(PTHREAD_VARIANT))
  
-GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T
+GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T __PTHREAD_EXPOSE_INTERNALS__
  GCC_PREPROCESSOR_DEFINITIONS_kext_development = MACH_ASSERT DEBUG
  GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS_kext) $(GCC_PREPROCESSOR_DEFINITIONS_kext_$(PTHREAD_VARIANT))
  
diff --git a/xcodescripts/pthread.dirty b/xcodescripts/pthread.dirty

new file mode 100644 (file)

index 0000000..2a8f66e
--- /dev/null
+++ b/xcodescripts/pthread.dirty
@@ -0,0 +1,33 @@
+# cacheline-aligned
+
+# uint64_t sized
+___pthread_stack_hint
+
+# pointer-sized
+___libdispatch_keventfunction
+___libdispatch_workerfunction
+___libdispatch_workloopfunction
+___pthread_head
+__main_thread_ptr
+__pthread_free
+__pthread_keys
+__pthread_malloc
+__pthread_ptr_munge_token
+_exitf
+
+# int-sized
+___is_threaded
+___libdispatch_offset
+___pthread_supported_features
+___pthread_tsd_lock
+___pthread_tsd_max
+___unix_conforming
+__main_qos
+__pthread_count
+__pthread_list_lock
+
+# byte-sized
+___workq_newapi
+_default_priority
+_max_priority
+_min_priority
diff --git a/xcodescripts/pthread.xcconfig b/xcodescripts/pthread.xcconfig

index 7b2f244451ac6e034c58655f3a4dfe0024134d22..1dedcaa5a2353f880edeef01161f7aff885d34e0 100644 (file)
--- a/xcodescripts/pthread.xcconfig
+++ b/xcodescripts/pthread.xcconfig
@@ -57,7 +57,7 @@ DISABLED_WARNING_CFLAGS = -Wno-int-conversion -Wno-missing-prototypes -Wno-sign-
  WARNING_CFLAGS = -Wall -Wextra -Warray-bounds-pointer-arithmetic -Wcomma -Wconditional-uninitialized -Wcovered-switch-default -Wdate-time -Wdeprecated -Wdouble-promotion -Wduplicate-enum -Wfloat-equal -Widiomatic-parentheses -Wignored-qualifiers -Wimplicit-fallthrough -Wmissing-noreturn -Wnullable-to-nonnull-conversion -Wover-aligned -Wpointer-arith -Wstatic-in-inline -Wtautological-compare -Wunguarded-availability -Wunused $(NO_WARNING_CFLAGS) $(DISABLED_WARNING_CFLAGS)
  NO_WARNING_CFLAGS = -Wno-pedantic -Wno-bad-function-cast -Wno-c++98-compat-pedantic -Wno-cast-align -Wno-cast-qual -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-format-nonliteral -Wno-missing-variable-declarations -Wno-packed -Wno-padded -Wno-reserved-id-macro -Wno-switch-enum -Wno-undef -Wno-unreachable-code-aggressive -Wno-unused-macros -Wno-used-but-marked-unused
  
-BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS)
+BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS) __PTHREAD_EXPOSE_INTERNALS__
  GCC_PREPROCESSOR_DEFINITIONS = $(BASE_PREPROCESSOR_MACROS) $(PLATFORM_PREPROCESSOR_DEFINITIONS)
  
  // TODO: Remove -fstack-protector on _debug when it is moved to libplatform
@@ -68,8 +68,10 @@ OTHER_CFLAGS_debug = -fno-inline -O0 -DDEBUG=1
  LINK_WITH_STANDARD_LIBRARIES = NO
  DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
  DYLIB_COMPATIBILITY_VERSION = 1
+DIRTY_LDFLAGS = -Wl,-dirty_data_list,$(SRCROOT)/xcodescripts/pthread.dirty
+DIRTY_LDFLAGS[sdk=macos*] =
  DYLIB_LDFLAGS = -Wl,-alias_list,$(SRCROOT)/xcodescripts/pthread.aliases -Wl,-umbrella,System -L/usr/lib/system -lsystem_kernel -lsystem_platform -ldyld -lcompiler_rt
-OTHER_LDFLAGS = $(DYLIB_LDFLAGS) $(CR_LDFLAGS) $(PLATFORM_LDFLAGS)
+OTHER_LDFLAGS = $(DYLIB_LDFLAGS) $(DIRTY_LDFLAGS) $(CR_LDFLAGS) $(PLATFORM_LDFLAGS)
  
  // Simulator build rules
  EXCLUDED_SOURCE_FILE_NAMES[sdk=iphonesimulator*] = *.c *.s
diff --git a/xcodescripts/resolved.xcconfig b/xcodescripts/resolved.xcconfig

index 2b33118d6124a08ce3e573792e5bb6b9b90c60f1..863252aedf7a23ab0cc70e38d85825ad927de98e 100644 (file)
--- a/xcodescripts/resolved.xcconfig
+++ b/xcodescripts/resolved.xcconfig
@@ -1,6 +1,6 @@
  #include "pthread.xcconfig"
  
-SUPPORTED_PLATFORMS = iphoneos appletvos watchos
+SUPPORTED_PLATFORMS = iphoneos
  PRODUCT_NAME = pthread_$(RESOLVED_VARIANT)
  OTHER_LDFLAGS =
  SKIP_INSTALL = YES
author	Apple <opensource@apple.com>
	Thu, 4 Oct 2018 22:01:40 +0000 (22:01 +0000)
committer	Apple <opensource@apple.com>
	Thu, 4 Oct 2018 22:01:40 +0000 (22:01 +0000)
kern/kern_init.c		patch \| blob \| blame \| history
kern/kern_internal.h		patch \| blob \| blame \| history
kern/kern_policy.c	[deleted file]	patch \| blob \| blame \| history
kern/kern_support.c		patch \| blob \| blame \| history
kern/kern_synch.c		patch \| blob \| blame \| history
kern/kern_trace.h		patch \| blob \| blame \| history
kern/synch_internal.h		patch \| blob \| blame \| history
kern/workqueue_internal.h		patch \| blob \| blame \| history
libpthread.xcodeproj/project.pbxproj		patch \| blob \| blame \| history
lldbmacros/init.py	[new file with mode: 0644]	patch \| blob
lldbmacros/pthread.py	[deleted file]	patch \| blob \| blame \| history
man/pthread_mutexattr.3		patch \| blob \| blame \| history
private/dependency_private.h	[new file with mode: 0644]	patch \| blob
private/private.h		patch \| blob \| blame \| history
private/qos_private.h		patch \| blob \| blame \| history
private/tsd_private.h		patch \| blob \| blame \| history
private/workqueue_private.h		patch \| blob \| blame \| history
pthread/introspection.h		patch \| blob \| blame \| history
pthread/pthread.h		patch \| blob \| blame \| history
pthread/pthread_spis.h		patch \| blob \| blame \| history
pthread/stack_np.h	[new file with mode: 0644]	patch \| blob
src/internal.h		patch \| blob \| blame \| history
src/offsets.h	[new file with mode: 0644]	patch \| blob
src/pthread.c		patch \| blob \| blame \| history
src/pthread_asm.s		patch \| blob \| blame \| history
src/pthread_cancelable.c		patch \| blob \| blame \| history
src/pthread_cond.c		patch \| blob \| blame \| history
src/pthread_dependency.c	[new file with mode: 0644]	patch \| blob
src/pthread_mutex.c		patch \| blob \| blame \| history
src/pthread_rwlock.c		patch \| blob \| blame \| history
src/pthread_tsd.c		patch \| blob \| blame \| history
src/qos.c		patch \| blob \| blame \| history
src/thread_setup.c		patch \| blob \| blame \| history
tests/Makefile		patch \| blob \| blame \| history
tests/cond_prepost.c	[new file with mode: 0644]	patch \| blob
tests/main_stack_custom.c		patch \| blob \| blame \| history
tests/mutex.c		patch \| blob \| blame \| history
tests/mutex_prepost.c	[new file with mode: 0644]	patch \| blob
tests/perf_contended_mutex_rwlock.c	[new file with mode: 0644]	patch \| blob
tests/pthread_dependency.c	[new file with mode: 0644]	patch \| blob
tests/pthread_threadid_np.c		patch \| blob \| blame \| history
tests/rdar_32848402.c		patch \| blob \| blame \| history
tests/stack.c	[new file with mode: 0644]	patch \| blob
tests/stack_aslr.c		patch \| blob \| blame \| history
tests/stack_size.c	[new file with mode: 0644]	patch \| blob
tools/locktrace.lua		patch \| blob \| blame \| history
tools/wqtrace.lua	[deleted file]	patch \| blob \| blame \| history
xcodescripts/install-lldbmacros.sh		patch \| blob \| blame \| history
xcodescripts/kext.xcconfig		patch \| blob \| blame \| history
xcodescripts/pthread.dirty	[new file with mode: 0644]	patch \| blob
xcodescripts/pthread.xcconfig		patch \| blob \| blame \| history
xcodescripts/resolved.xcconfig		patch \| blob \| blame \| history