libpthread-416.60.2.tar.gz

[apple/libpthread.git] / kern / kern_support.c
diff --git a/kern/kern_support.c b/kern/kern_support.c

index f63a7818e4e94b5abae8db7e16021e681c3ae0bc..0e576c21a59e6319ce2b365da711f259e624fe31 100644 (file)
--- a/kern/kern_support.c
+++ b/kern/kern_support.c
@@ -1,8 +1,8 @@
  /*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
   * This file contains Original Code and/or Modifications of Original Code
   * as defined in and that are subject to the Apple Public Source License
   * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
   * unlawful or unlicensed copies of an Apple operating system, or to
   * circumvent, violate, or enable the circumvention or violation of, any
   * terms of an Apple operating system software license agreement.
- * 
+ *
   * Please obtain a copy of the License at
   * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
   * The Original Code and all software distributed under the License are
   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   * Please see the License for the specific language governing rights and
   * limitations under the License.
- * 
+ *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   */
  /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
@@ -32,8 +32,8 @@
  
  #pragma mark - Front Matter
  
-#define  _PTHREAD_CONDATTR_T
-#define  _PTHREAD_COND_T
+#define _PTHREAD_CONDATTR_T
+#define _PTHREAD_COND_T
  #define _PTHREAD_MUTEXATTR_T
  #define _PTHREAD_MUTEX_T
  #define _PTHREAD_RWLOCKATTR_T
@@ -47,6 +47,7 @@
  #undef pthread_rwlock_t
  
  #include <sys/cdefs.h>
+#include <os/log.h>
  
  // <rdar://problem/26158937> panic() should be marked noreturn
  extern void panic(const char *string, ...) __printflike(1,2) __dead2;
@@ -84,8 +85,7 @@ extern void panic(const char *string, ...) __printflike(1,2) __dead2;
  #include <kern/clock.h>
  #include <mach/kern_return.h>
  #include <kern/thread.h>
-#include <kern/sched_prim.h>
-#include <kern/kalloc.h>
+#include <kern/zalloc.h>
  #include <kern/sched_prim.h>   /* for thread_exception_return */
  #include <kern/processor.h>
  #include <kern/assert.h>
@@ -105,11 +105,11 @@ extern void panic(const char *string, ...) __printflike(1,2) __dead2;
  #include <libkern/OSAtomic.h>
  #include <libkern/libkern.h>
  
-#include <sys/pthread_shims.h>
  #include "kern_internal.h"
  
-// XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
-#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
+#ifndef WQ_SETUP_EXIT_THREAD
+#define WQ_SETUP_EXIT_THREAD    8
+#endif
  
  // XXX: Ditto for thread tags from kern/thread.h
  #define        THREAD_TAG_MAINTHREAD 0x1
@@ -120,71 +120,35 @@ lck_grp_attr_t   *pthread_lck_grp_attr;
  lck_grp_t    *pthread_lck_grp;
  lck_attr_t   *pthread_lck_attr;
  
-extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
-extern void workqueue_thread_yielded(void);
-
-enum run_nextreq_mode {
-       RUN_NEXTREQ_DEFAULT,
-       RUN_NEXTREQ_DEFAULT_KEVENT,
-       RUN_NEXTREQ_OVERCOMMIT,
-       RUN_NEXTREQ_OVERCOMMIT_KEVENT,
-       RUN_NEXTREQ_DEFERRED_OVERCOMMIT,
-       RUN_NEXTREQ_UNCONSTRAINED,
-       RUN_NEXTREQ_EVENT_MANAGER,
-       RUN_NEXTREQ_ADD_TIMER
-};
-static thread_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th,
-               enum run_nextreq_mode mode, pthread_priority_t prio,
-               bool kevent_bind_via_return);
-
-static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority);
-
-static void wq_runreq(proc_t p, thread_t th, struct workqueue *wq,
-               struct threadlist *tl, boolean_t return_directly, boolean_t deferred_kevent);
-
-static void _setup_wqthread(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl, bool first_use);
-
-static void reset_priority(struct threadlist *tl, pthread_priority_t pri);
-static pthread_priority_t pthread_priority_from_wq_class_index(struct workqueue *wq, int index);
-
-static void wq_unpark_continue(void* ptr, wait_result_t wait_result) __dead2;
-
-static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t ignore_constrained_thread_limit);
-
-static void workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use);
-static void workqueue_lock_spin(struct workqueue *);
-static void workqueue_unlock(struct workqueue *);
-
-static boolean_t may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, uint32_t my_priclass, boolean_t *start_timer);
-
-static mach_vm_offset_t stack_addr_hint(proc_t p, vm_map_t vmap);
-
-int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
-int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
-
-#define WQ_MAXPRI_MIN  0       /* low prio queue num */
-#define WQ_MAXPRI_MAX  2       /* max  prio queuenum */
-#define WQ_PRI_NUM     3       /* number of prio work queues */
-
  #define C_32_STK_ALIGN          16
  #define C_64_STK_ALIGN          16
-#define C_64_REDZONE_LEN        128
  
+// WORKQ use the largest alignment any platform needs
+#define C_WORKQ_STK_ALIGN       16
+
+#if defined(__arm64__)
+/* Pull the pthread_t into the same page as the top of the stack so we dirty one less page.
+ * <rdar://problem/19941744> The _pthread struct at the top of the stack shouldn't be page-aligned
+ */
+#define PTHREAD_T_OFFSET (12*1024)
+#else
  #define PTHREAD_T_OFFSET 0
+#endif
  
  /*
- * Flags filed passed to bsdthread_create and back in pthread_start 
+ * Flags filed passed to bsdthread_create and back in pthread_start
  31  <---------------------------------> 0
  _________________________________________
  | flags(8) | policy(8) | importance(16) |
  -----------------------------------------
  */
  
-#define PTHREAD_START_CUSTOM           0x01000000
+#define PTHREAD_START_CUSTOM           0x01000000 // <rdar://problem/34501401>
  #define PTHREAD_START_SETSCHED         0x02000000
-#define PTHREAD_START_DETACHED         0x04000000
+// was PTHREAD_START_DETACHED          0x04000000
  #define PTHREAD_START_QOSCLASS         0x08000000
  #define PTHREAD_START_TSD_BASE_SET     0x10000000
+#define PTHREAD_START_SUSPENDED                0x20000000
  #define PTHREAD_START_QOSCLASS_MASK    0x00ffffff
  #define PTHREAD_START_POLICY_BITSHIFT 16
  #define PTHREAD_START_POLICY_MASK 0xff
@@ -196,53 +160,12 @@ _________________________________________
  
  #define BASEPRI_DEFAULT 31
  
-#pragma mark sysctls
-
-uint32_t wq_yielded_threshold          = WQ_YIELDED_THRESHOLD;
-uint32_t wq_yielded_window_usecs       = WQ_YIELDED_WINDOW_USECS;
-uint32_t wq_stalled_window_usecs       = WQ_STALLED_WINDOW_USECS;
-uint32_t wq_reduce_pool_window_usecs   = WQ_REDUCE_POOL_WINDOW_USECS;
-uint32_t wq_max_timer_interval_usecs   = WQ_MAX_TIMER_INTERVAL_USECS;
-uint32_t wq_max_threads                        = WORKQUEUE_MAXTHREADS;
-uint32_t wq_max_constrained_threads    = WORKQUEUE_MAXTHREADS / 8;
-uint32_t wq_max_concurrency = 1; // set to ncpus on load
-
-SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_yielded_threshold, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_yielded_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_stalled_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_reduce_pool_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_timer_interval_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_threads, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
-          &wq_max_constrained_threads, 0, "");
-
-#ifdef DEBUG
-SYSCTL_INT(_kern, OID_AUTO, wq_max_concurrency, CTLFLAG_RW | CTLFLAG_LOCKED,
-                  &wq_max_concurrency, 0, "");
-
-static int wq_kevent_test SYSCTL_HANDLER_ARGS;
-SYSCTL_PROC(_debug, OID_AUTO, wq_kevent_test, CTLFLAG_MASKED | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLTYPE_OPAQUE, NULL, 0, wq_kevent_test, 0, "-");
-#endif
-
-static uint32_t wq_init_constrained_limit = 1;
-
  uint32_t pthread_debug_tracing = 1;
  
-SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
-                  &pthread_debug_tracing, 0, "")
+static uint32_t pthread_mutex_default_policy;
  
+SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED,
+          &pthread_mutex_default_policy, 0, "");
  
  #pragma mark - Process/Thread Setup/Teardown syscalls
  
@@ -252,11 +175,13 @@ stack_addr_hint(proc_t p, vm_map_t vmap)
         mach_vm_offset_t stackaddr;
         mach_vm_offset_t aslr_offset;
         bool proc64bit = proc_is64bit(p);
+       bool proc64bit_data = proc_is64bit_data(p);
  
         // We can't safely take random values % something unless its a power-of-two
         _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE), "PTH_DEFAULT_STACKSIZE is a power-of-two");
  
  #if defined(__i386__) || defined(__x86_64__)
+       (void)proc64bit_data;
         if (proc64bit) {
                 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
                 aslr_offset = random() % (1 << 28); // about 512 stacks
@@ -272,15 +197,36 @@ stack_addr_hint(proc_t p, vm_map_t vmap)
                 stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386 + aslr_offset;
         }
  #elif defined(__arm__) || defined(__arm64__)
-       // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
-       aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE);
-       aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset, vm_map_page_mask(vmap));
-       if (proc64bit) {
-               // 64 stacks below nanomalloc (see NANOZONE_SIGNATURE)
-               stackaddr = 0x170000000 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset;
+       user_addr_t main_thread_stack_top = 0;
+       if (pthread_kern->proc_get_user_stack) {
+               main_thread_stack_top = pthread_kern->proc_get_user_stack(p);
+       }
+       if (proc64bit && main_thread_stack_top) {
+               // The main thread stack position is randomly slid by xnu (c.f.
+               // load_main() in mach_loader.c), so basing pthread stack allocations
+               // where the main thread stack ends is already ASLRd and doing so
+               // avoids creating a gap in the process address space that may cause
+               // extra PTE memory usage. rdar://problem/33328206
+               stackaddr = vm_map_trunc_page_mask((vm_map_offset_t)main_thread_stack_top,
+                               vm_map_page_mask(vmap));
         } else {
-               // If you try to slide down from this point, you risk ending up in memory consumed by malloc
-               stackaddr = SHARED_REGION_BASE_ARM - 32 * PTH_DEFAULT_STACKSIZE + aslr_offset;
+               // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
+               aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE);
+               aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset,
+                               vm_map_page_mask(vmap));
+               if (proc64bit) {
+                       // 64 stacks below shared region
+                       stackaddr = SHARED_REGION_BASE_ARM64 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset;
+               } else {
+                       // If you try to slide down from this point, you risk ending up in memory consumed by malloc
+                       if (proc64bit_data) {
+                               stackaddr = SHARED_REGION_BASE_ARM64_32;
+                       } else {
+                               stackaddr = SHARED_REGION_BASE_ARM;
+                       }
+
+                       stackaddr -= 32 * PTH_DEFAULT_STACKSIZE + aslr_offset;
+               }
         }
  #else
  #error Need to define a stack address hint for this architecture
@@ -288,41 +234,45 @@ stack_addr_hint(proc_t p, vm_map_t vmap)
         return stackaddr;
  }
  
+static bool
+_pthread_priority_to_policy(pthread_priority_t priority,
+               thread_qos_policy_data_t *data)
+{
+       data->qos_tier = _pthread_priority_thread_qos(priority);
+       data->tier_importance = _pthread_priority_relpri(priority);
+       if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 ||
+                       data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
+               return false;
+       }
+       return true;
+}
+
  /**
   * bsdthread_create system call.  Used by pthread_create.
   */
  int
-_bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
+_bsdthread_create(struct proc *p,
+               __unused user_addr_t user_func, __unused user_addr_t user_funcarg,
+               user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags,
+               user_addr_t *retval)
  {
         kern_return_t kret;
         void * sright;
         int error = 0;
-       int allocated = 0;
-       mach_vm_offset_t stackaddr;
-       mach_vm_size_t th_allocsize = 0;
-       mach_vm_size_t th_guardsize;
-       mach_vm_offset_t th_stack;
-       mach_vm_offset_t th_pthread;
         mach_vm_offset_t th_tsd_base;
         mach_port_name_t th_thport;
         thread_t th;
-       vm_map_t vmap = pthread_kern->current_map();
         task_t ctask = current_task();
         unsigned int policy, importance;
         uint32_t tsd_offset;
-
-       int isLP64 = 0;
+       bool start_suspended = (flags & PTHREAD_START_SUSPENDED);
  
         if (pthread_kern->proc_get_register(p) == 0) {
                 return EINVAL;
         }
  
-       PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0);
-
-       isLP64 = proc_is64bit(p);
-       th_guardsize = vm_map_page_size(vmap);
+       PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0);
  
-       stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
         kret = pthread_kern->thread_create(ctask, &th);
         if (kret != KERN_SUCCESS)
                 return(ENOMEM);
@@ -332,154 +282,104 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
  
         sright = (void *)pthread_kern->convert_thread_to_port(th);
         th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask));
+       if (!MACH_PORT_VALID(th_thport)) {
+               error = EMFILE; // userland will convert this into a crash
+               goto out;
+       }
  
         if ((flags & PTHREAD_START_CUSTOM) == 0) {
-               mach_vm_size_t pthread_size =
-                       vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(vmap));
-               th_allocsize = th_guardsize + user_stack + pthread_size;
-               user_stack += PTHREAD_T_OFFSET;
-
-               kret = mach_vm_map(vmap, &stackaddr,
-                               th_allocsize,
-                               page_size-1,
-                               VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
-                               0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-                               VM_INHERIT_DEFAULT);
-               if (kret != KERN_SUCCESS){
-                       kret = mach_vm_allocate(vmap,
-                                       &stackaddr, th_allocsize,
-                                       VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
-               }
-               if (kret != KERN_SUCCESS) {
-                       error = ENOMEM;
-                       goto out;
-               }
-
-               PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
-
-               allocated = 1;
-               /*
-                * The guard page is at the lowest address
-                * The stack base is the highest address
-                */
-               kret = mach_vm_protect(vmap,  stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
-
-               if (kret != KERN_SUCCESS) { 
-                       error = ENOMEM;
-                       goto out1;
-               }
-
-               th_pthread = stackaddr + th_guardsize + user_stack;
-               th_stack = th_pthread;
-               
-               /*
-               * Pre-fault the first page of the new thread's stack and the page that will
-               * contain the pthread_t structure.
-               */      
-               if (vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) != 
-                               vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap))){
-                       vm_fault( vmap,
-                                       vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
-                                       VM_PROT_READ | VM_PROT_WRITE,
-                                       FALSE, 
-                                       THREAD_UNINT, NULL, 0);
-               }
-               
-               vm_fault( vmap,
-                               vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap)),
-                               VM_PROT_READ | VM_PROT_WRITE,
-                               FALSE, 
-                               THREAD_UNINT, NULL, 0);
-
-       } else {
-               th_stack = user_stack;
-               th_pthread = user_pthread;
-
-               PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
+               error = EINVAL;
+               goto out;
         }
  
+       PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3);
+
         tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
         if (tsd_offset) {
-               th_tsd_base = th_pthread + tsd_offset;
+               th_tsd_base = user_pthread + tsd_offset;
                 kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
                 if (kret == KERN_SUCCESS) {
                         flags |= PTHREAD_START_TSD_BASE_SET;
                 }
         }
+       /*
+        * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
+        * supports this flag (after the fact).
+        */
+       flags &= ~PTHREAD_START_SUSPENDED;
  
-#if defined(__i386__) || defined(__x86_64__)
         /*
-        * Set up i386 registers & function call.
+        * Set up registers & function call.
          */
-       if (isLP64 == 0) {
-               x86_thread_state32_t state = {
-                       .eip = (unsigned int)pthread_kern->proc_get_threadstart(p),
-                       .eax = (unsigned int)th_pthread,
-                       .ebx = (unsigned int)th_thport,
-                       .ecx = (unsigned int)user_func,
-                       .edx = (unsigned int)user_funcarg,
-                       .edi = (unsigned int)user_stack,
-                       .esi = (unsigned int)flags,
-                       /*
-                        * set stack pointer
-                        */
-                       .esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
+#if defined(__i386__) || defined(__x86_64__)
+       if (proc_is64bit_data(p)) {
+               x86_thread_state64_t state = {
+                       .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
+                       .rdi = (uint64_t)user_pthread,
+                       .rsi = (uint64_t)th_thport,
+                       .rdx = (uint64_t)user_func,    /* golang wants this */
+                       .rcx = (uint64_t)user_funcarg, /* golang wants this */
+                       .r8  = (uint64_t)user_stack,   /* golang wants this */
+                       .r9  = (uint64_t)flags,
+
+                       .rsp = (uint64_t)user_stack,
                 };
  
-               error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-               if (error != KERN_SUCCESS) {
-                       error = EINVAL;
-                       goto out;
-               }
+               (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
         } else {
-               x86_thread_state64_t state64 = {
-                       .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
-                       .rdi = (uint64_t)th_pthread,
-                       .rsi = (uint64_t)(th_thport),
-                       .rdx = (uint64_t)user_func,
-                       .rcx = (uint64_t)user_funcarg,
-                       .r8 = (uint64_t)user_stack,
-                       .r9 = (uint64_t)flags,
-                       /*
-                        * set stack pointer aligned to 16 byte boundary
-                        */
-                       .rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN)
+               x86_thread_state32_t state = {
+                       .eip = (uint32_t)pthread_kern->proc_get_threadstart(p),
+                       .eax = (uint32_t)user_pthread,
+                       .ebx = (uint32_t)th_thport,
+                       .ecx = (uint32_t)user_func,    /* golang wants this */
+                       .edx = (uint32_t)user_funcarg, /* golang wants this */
+                       .edi = (uint32_t)user_stack,   /* golang wants this */
+                       .esi = (uint32_t)flags,
+
+                       .esp = (uint32_t)user_stack,
                 };
  
-               error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
-               if (error != KERN_SUCCESS) {
-                       error = EINVAL;
-                       goto out;
-               }
-               
+               (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
         }
-#elif defined(__arm__)
-       arm_thread_state_t state = {
-               .pc = (int)pthread_kern->proc_get_threadstart(p),
-               .r[0] = (unsigned int)th_pthread,
-               .r[1] = (unsigned int)th_thport,
-               .r[2] = (unsigned int)user_func,
-               .r[3] = (unsigned int)user_funcarg,
-               .r[4] = (unsigned int)user_stack,
-               .r[5] = (unsigned int)flags,
-
-               /* Set r7 & lr to 0 for better back tracing */
-               .r[7] = 0,
-               .lr = 0,
-
-               /*      
-                * set stack pointer
-                */
-               .sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
-       };
+#elif defined(__arm__) || defined(__arm64__)
+       if (proc_is64bit_data(p)) {
+#ifdef __arm64__
+               arm_thread_state64_t state = {
+                       .pc   = (uint64_t)pthread_kern->proc_get_threadstart(p),
+                       .x[0] = (uint64_t)user_pthread,
+                       .x[1] = (uint64_t)th_thport,
+                       .x[2] = (uint64_t)user_func,    /* golang wants this */
+                       .x[3] = (uint64_t)user_funcarg, /* golang wants this */
+                       .x[4] = (uint64_t)user_stack,   /* golang wants this */
+                       .x[5] = (uint64_t)flags,
+
+                       .sp   = (uint64_t)user_stack,
+               };
  
-       (void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
+               (void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
+#else
+               panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
+#endif // defined(__arm64__)
+       } else {
+               arm_thread_state_t state = {
+                       .pc   = (uint32_t)pthread_kern->proc_get_threadstart(p),
+                       .r[0] = (uint32_t)user_pthread,
+                       .r[1] = (uint32_t)th_thport,
+                       .r[2] = (uint32_t)user_func,    /* golang wants this */
+                       .r[3] = (uint32_t)user_funcarg, /* golang wants this */
+                       .r[4] = (uint32_t)user_stack,   /* golang wants this */
+                       .r[5] = (uint32_t)flags,
+
+                       .sp   = (uint32_t)user_stack,
+               };
  
+               (void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
+       }
  #else
  #error bsdthread_create  not defined for this architecture
  #endif
  
-       if ((flags & PTHREAD_START_SETSCHED) != 0) {
+       if (flags & PTHREAD_START_SETSCHED) {
                 /* Set scheduling parameters if needed */
                 thread_extended_policy_data_t    extinfo;
                 thread_precedence_policy_data_t   precedinfo;
@@ -497,38 +397,57 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
  
                 precedinfo.importance = (importance - BASEPRI_DEFAULT);
                 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
-       } else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
+       } else if (flags & PTHREAD_START_QOSCLASS) {
                 /* Set thread QoS class if requested. */
-               pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK);
-
                 thread_qos_policy_data_t qos;
-               qos.qos_tier = pthread_priority_get_thread_qos(priority);
-               qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 :
-                               _pthread_priority_get_relpri(priority);
  
-               pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+               if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) {
+                       error = EINVAL;
+                       goto out;
+               }
+               pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY,
+                               (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+       }
+
+       if (pthread_kern->proc_get_mach_thread_self_tsd_offset) {
+               uint64_t mach_thread_self_offset =
+                               pthread_kern->proc_get_mach_thread_self_tsd_offset(p);
+               if (mach_thread_self_offset && tsd_offset) {
+                       bool proc64bit = proc_is64bit(p);
+                       if (proc64bit) {
+                               uint64_t th_thport_tsd = (uint64_t)th_thport;
+                               error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
+                                               mach_thread_self_offset, sizeof(th_thport_tsd));
+                       } else {
+                               uint32_t th_thport_tsd = (uint32_t)th_thport;
+                               error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
+                                               mach_thread_self_offset, sizeof(th_thport_tsd));
+                       }
+                       if (error) {
+                               goto out;
+                       }
+               }
         }
  
-       kret = pthread_kern->thread_resume(th);
-       if (kret != KERN_SUCCESS) {
-               error = EINVAL;
-               goto out1;
+       if (!start_suspended) {
+               kret = pthread_kern->thread_resume(th);
+               if (kret != KERN_SUCCESS) {
+                       error = EINVAL;
+                       goto out;
+               }
         }
         thread_deallocate(th);  /* drop the creator reference */
  
-       PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
-
-       // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms
-       *retval = (user_addr_t)th_pthread;
+       PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0);
  
+       *retval = user_pthread;
         return(0);
  
-out1:
-       if (allocated != 0) {
-               (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
-       }
  out:
         (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
+       if (pthread_kern->thread_will_park_or_terminate) {
+               pthread_kern->thread_will_park_or_terminate(th);
+       }
         (void)thread_terminate(th);
         (void)thread_deallocate(th);
         return(error);
@@ -553,46 +472,47 @@ _bsdthread_terminate(__unused struct proc *p,
         freeaddr = (mach_vm_offset_t)stackaddr;
         freesize = size;
  
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
+       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff);
  
         if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
                 if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){
                         vm_map_t user_map = pthread_kern->current_map();
                         freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map));
                         kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
-                       assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
+#if MACH_ASSERT
+                       if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) {
+                               os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret);
+                       }
+#endif
                         kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE);
                         assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
                 } else {
                         kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
                         if (kret != KERN_SUCCESS) {
-                               PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
-                               return(EINVAL);
+                               PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
                         }
                 }
         }
-       
-       (void) thread_terminate(th);
+
+       if (pthread_kern->thread_will_park_or_terminate) {
+               pthread_kern->thread_will_park_or_terminate(th);
+       }
+       (void)thread_terminate(th);
         if (sem != MACH_PORT_NULL) {
-                kret = pthread_kern->semaphore_signal_internal_trap(sem);
+               kret = pthread_kern->semaphore_signal_internal_trap(sem);
                 if (kret != KERN_SUCCESS) {
-                       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
-                       return(EINVAL);
+                       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
                 }
         }
-       
+
         if (kthport != MACH_PORT_NULL) {
                 pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
         }
  
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0);
+       PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0);
  
         pthread_kern->thread_exception_return();
-       panic("bsdthread_terminate: still running\n");
-
-       PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0);
-
-       return(0);
+       __builtin_unreachable();
  }
  
  /**
@@ -609,71 +529,118 @@ _bsdthread_register(struct proc *p,
                     uint64_t dispatchqueue_offset,
                     int32_t *retval)
  {
-       /* We have to do this first so that it resets after fork */
-       pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stack_addr_hint(p, pthread_kern->current_map()));
+       struct _pthread_registration_data data = {};
+       uint32_t max_tsd_offset;
+       kern_return_t kr;
+       size_t pthread_init_sz = 0;
  
-       /* prevent multiple registrations */
-       if (pthread_kern->proc_get_register(p) != 0) {
-               return(EINVAL);
-       }
         /* syscall randomizer test can pass bogus values */
         if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) {
                 return(EINVAL);
         }
+       /*
+        * if we have pthread_init_data, then we use that and target_concptr
+        * (which is an offset) get data.
+        */
+       if (pthread_init_data != 0) {
+               if (pthread_init_data_size < sizeof(data.version)) {
+                       return EINVAL;
+               }
+               pthread_init_sz = MIN(sizeof(data), (size_t)pthread_init_data_size);
+               int ret = copyin(pthread_init_data, &data, pthread_init_sz);
+               if (ret) {
+                       return ret;
+               }
+               if (data.version != (size_t)pthread_init_data_size) {
+                       return EINVAL;
+               }
+       } else {
+               data.dispatch_queue_offset = dispatchqueue_offset;
+       }
+
+       /* We have to do this before proc_get_register so that it resets after fork */
+       mach_vm_offset_t stackaddr = stack_addr_hint(p, pthread_kern->current_map());
+       pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stackaddr);
+
+       /* prevent multiple registrations */
+       if (pthread_kern->proc_get_register(p) != 0) {
+               return(EINVAL);
+       }
+
         pthread_kern->proc_set_threadstart(p, threadstart);
         pthread_kern->proc_set_wqthread(p, wqthread);
         pthread_kern->proc_set_pthsize(p, pthsize);
         pthread_kern->proc_set_register(p);
  
-       /* if we have pthread_init_data, then we use that and target_concptr (which is an offset) get data. */
-       if (pthread_init_data != 0) {
-               thread_qos_policy_data_t qos;
+       uint32_t tsd_slot_sz = proc_is64bit(p) ? sizeof(uint64_t) : sizeof(uint32_t);
+       if ((uint32_t)pthsize >= tsd_slot_sz &&
+                       data.tsd_offset <= (uint32_t)(pthsize - tsd_slot_sz)) {
+               max_tsd_offset = ((uint32_t)pthsize - data.tsd_offset - tsd_slot_sz);
+       } else {
+               data.tsd_offset = 0;
+               max_tsd_offset = 0;
+       }
+       pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset);
  
-               struct _pthread_registration_data data = {};
-               size_t pthread_init_sz = MIN(sizeof(struct _pthread_registration_data), (size_t)pthread_init_data_size);
+       if (data.dispatch_queue_offset > max_tsd_offset) {
+               data.dispatch_queue_offset = 0;
+       }
+       pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset);
  
-               kern_return_t kr = copyin(pthread_init_data, &data, pthread_init_sz);
-               if (kr != KERN_SUCCESS) {
-                       return EINVAL;
+       if (pthread_kern->proc_set_return_to_kernel_offset) {
+               if (data.return_to_kernel_offset > max_tsd_offset) {
+                       data.return_to_kernel_offset = 0;
                 }
+               pthread_kern->proc_set_return_to_kernel_offset(p,
+                               data.return_to_kernel_offset);
+       }
  
-               /* Incoming data from the data structure */
-               pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset);
-               if (data.version > offsetof(struct _pthread_registration_data, tsd_offset)
-                       && data.tsd_offset < (uint32_t)pthsize) {
-                       pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset);
+       if (pthread_kern->proc_set_mach_thread_self_tsd_offset) {
+               if (data.mach_thread_self_offset > max_tsd_offset) {
+                       data.mach_thread_self_offset = 0;
                 }
+               pthread_kern->proc_set_mach_thread_self_tsd_offset(p,
+                               data.mach_thread_self_offset);
+       }
  
+       if (pthread_init_data != 0) {
                 /* Outgoing data that userspace expects as a reply */
                 data.version = sizeof(struct _pthread_registration_data);
+               data.main_qos = _pthread_unspecified_priority();
+
                 if (pthread_kern->qos_main_thread_active()) {
                         mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
+                       thread_qos_policy_data_t qos;
                         boolean_t gd = FALSE;
  
-                       kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
+                       kr = pthread_kern->thread_policy_get(current_thread(),
+                                       THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
                         if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
-                               /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
+                               /*
+                                * Unspecified threads means the kernel wants us
+                                * to impose legacy upon the thread.
+                                */
                                 qos.qos_tier = THREAD_QOS_LEGACY;
                                 qos.tier_importance = 0;
  
-                               kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+                               kr = pthread_kern->thread_policy_set_internal(current_thread(),
+                                               THREAD_QOS_POLICY, (thread_policy_t)&qos,
+                                               THREAD_QOS_POLICY_COUNT);
                         }
  
                         if (kr == KERN_SUCCESS) {
-                               data.main_qos = thread_qos_get_pthread_priority(qos.qos_tier);
-                       } else {
-                               data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+                               data.main_qos = _pthread_priority_make_from_thread_qos(
+                                               qos.qos_tier, 0, 0);
                         }
-               } else {
-                       data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
                 }
  
+               data.stack_addr_hint = stackaddr;
+               data.mutex_default_policy = pthread_mutex_default_policy;
+
                 kr = copyout(&data, pthread_init_data, pthread_init_sz);
                 if (kr != KERN_SUCCESS) {
                         return EINVAL;
                 }
-       } else {
-               pthread_kern->proc_set_dispatchqueue_offset(p, dispatchqueue_offset);
         }
  
         /* return the supported feature set as the return value. */
@@ -682,2472 +649,234 @@ _bsdthread_register(struct proc *p,
         return(0);
  }
  
-#pragma mark - QoS Manipulation
+
+#pragma mark - Workqueue Thread Support
+
+static mach_vm_size_t
+workq_thread_allocsize(proc_t p, vm_map_t wq_map,
+               mach_vm_size_t *guardsize_out)
+{
+       mach_vm_size_t guardsize = vm_map_page_size(wq_map);
+       mach_vm_size_t pthread_size = vm_map_round_page_mask(
+                       pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET,
+                       vm_map_page_mask(wq_map));
+       if (guardsize_out) *guardsize_out = guardsize;
+       return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
+}
  
  int
-_bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
+workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr)
  {
-       kern_return_t kr;
-       thread_t th;
+       mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
+       mach_vm_size_t guardsize, th_allocsize;
+       kern_return_t kret;
  
-       pthread_priority_t priority;
+       th_allocsize = workq_thread_allocsize(p, vmap, &guardsize);
+       kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1,
+                       VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE,
+                       VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
  
-       /* Unused parameters must be zero. */
-       if (arg3 != 0) {
-               return EINVAL;
+       if (kret != KERN_SUCCESS) {
+               kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize,
+                               VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
         }
  
-       /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
-       if (proc_is64bit(p)) {
-               uint64_t v;
-               kr = copyin(tsd_priority_addr, &v, sizeof(v));
-               if (kr != KERN_SUCCESS) {
-                       return kr;
-               }
-               priority = (int)(v & 0xffffffff);
-       } else {
-               uint32_t v;
-               kr = copyin(tsd_priority_addr, &v, sizeof(v));
-               if (kr != KERN_SUCCESS) {
-                       return kr;
-               }
-               priority = v;
+       if (kret != KERN_SUCCESS) {
+               goto fail;
         }
  
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
+       /*
+        * The guard page is at the lowest address
+        * The stack base is the highest address
+        */
+       kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE);
+       if (kret != KERN_SUCCESS) {
+               goto fail_vm_deallocate;
         }
  
-       /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
-       if (th != current_thread()) {
-               thread_deallocate(th);
-               return EPERM;
+       if (out_addr) {
+               *out_addr = stackaddr;
         }
+       return 0;
  
-       int rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval);
-
-       /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
-       /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
-
-       thread_deallocate(th);
-
-       return rv;
+fail_vm_deallocate:
+       (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
+fail:
+       return kret;
  }
  
-static inline struct threadlist *
-util_get_thread_threadlist_entry(thread_t th)
+int
+workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr)
  {
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       if (uth) {
-               struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-               return tl;
+       return mach_vm_deallocate(vmap, stackaddr,
+                       workq_thread_allocsize(p, vmap, NULL));
+}
+
+void
+workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th,
+               vm_map_t vmap, user_addr_t stackaddr)
+{
+       // Keep this in sync with workq_setup_thread()
+       const vm_size_t       guardsize = vm_map_page_size(vmap);
+       const user_addr_t     freeaddr = (user_addr_t)stackaddr + guardsize;
+       const vm_map_offset_t freesize = vm_map_trunc_page_mask(
+                       (PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1,
+                       vm_map_page_mask(vmap)) - guardsize;
+
+       __assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr,
+                       freesize, VM_BEHAVIOR_REUSABLE);
+#if MACH_ASSERT
+       if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
+               os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
         }
-       return NULL;
+#endif
  }
  
-int
-_bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval)
-{
-       thread_qos_policy_data_t qos;
-       mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
-       boolean_t gd = FALSE;
-       bool was_manager_thread = false;
-       thread_t th = current_thread();
-       struct workqueue *wq = NULL;
-       struct threadlist *tl = NULL;
+struct workq_thread_addrs {
+       user_addr_t self;
+       user_addr_t stack_bottom;
+       user_addr_t stack_top;
+};
  
-       kern_return_t kr;
-       int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
+static inline void
+workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr)
+{
+       th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN);
+}
  
-       if ((flags & _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND) != 0) {
-               tl = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       wq = tl->th_workq;
-               } else {
-                       goto qos;
-               }
+static void
+workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr,
+                                          struct workq_thread_addrs *th_addrs)
+{
+       const vm_size_t guardsize = vm_map_page_size(map);
  
-               workqueue_lock_spin(wq);
-               if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
-                       tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
-                       unsigned int kevent_flags = KEVENT_FLAG_WORKQ;
-                       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                               kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
-                       }
+       th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE +
+                       guardsize + PTHREAD_T_OFFSET);
+       workq_thread_set_top_addr(th_addrs, th_addrs->self);
+       th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize);
+}
  
-                       workqueue_unlock(wq);
-                       kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
-               } else {
-                       workqueue_unlock(wq);
-               }
+static inline void
+workq_set_register_state(proc_t p, thread_t th,
+               struct workq_thread_addrs *addrs, mach_port_name_t kport,
+               user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count)
+{
+       user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
+       if (!wqstart_fnptr) {
+               panic("workqueue thread start function pointer is NULL");
         }
  
-qos:
-       if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
-               kr = pthread_kern->thread_policy_get(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
-               if (kr != KERN_SUCCESS) {
-                       qos_rv = EINVAL;
-                       goto voucher;
-               }
+#if defined(__i386__) || defined(__x86_64__)
+       if (proc_is64bit_data(p) == 0) {
+               x86_thread_state32_t state = {
+                       .eip = (unsigned int)wqstart_fnptr,
+                       .eax = /* arg0 */ (unsigned int)addrs->self,
+                       .ebx = /* arg1 */ (unsigned int)kport,
+                       .ecx = /* arg2 */ (unsigned int)addrs->stack_bottom,
+                       .edx = /* arg3 */ (unsigned int)kevent_list,
+                       .edi = /* arg4 */ (unsigned int)upcall_flags,
+                       .esi = /* arg5 */ (unsigned int)kevent_count,
  
-               /* If we have main-thread QoS then we don't allow a thread to come out of QOS_CLASS_UNSPECIFIED. */
-               if (pthread_kern->qos_main_thread_active() && qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
-                       qos_rv = EPERM;
-                       goto voucher;
-               }
+                       .esp = (int)((vm_offset_t)addrs->stack_top),
+               };
  
-               /* Get the work queue for tracing, also the threadlist for bucket manipluation. */
-               if (!tl) {
-                       tl = util_get_thread_threadlist_entry(th);
-                       if (tl) wq = tl->th_workq;
+               int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
+               if (error != KERN_SUCCESS) {
+                       panic(__func__ ": thread_set_wq_state failed: %d", error);
                 }
+       } else {
+               x86_thread_state64_t state64 = {
+                       // x86-64 already passes all the arguments in registers, so we just put them in their final place here
+                       .rip = (uint64_t)wqstart_fnptr,
+                       .rdi = (uint64_t)addrs->self,
+                       .rsi = (uint64_t)kport,
+                       .rdx = (uint64_t)addrs->stack_bottom,
+                       .rcx = (uint64_t)kevent_list,
+                       .r8  = (uint64_t)upcall_flags,
+                       .r9  = (uint64_t)kevent_count,
  
-               PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0);
+                       .rsp = (uint64_t)(addrs->stack_top)
+               };
  
-               qos.qos_tier = pthread_priority_get_thread_qos(priority);
-               qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority);
+               int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
+               if (error != KERN_SUCCESS) {
+                       panic(__func__ ": thread_set_wq_state failed: %d", error);
+               }
+       }
+#elif defined(__arm__) || defined(__arm64__)
+       if (!proc_is64bit_data(p)) {
+               arm_thread_state_t state = {
+                       .pc = (int)wqstart_fnptr,
+                       .r[0] = (unsigned int)addrs->self,
+                       .r[1] = (unsigned int)kport,
+                       .r[2] = (unsigned int)addrs->stack_bottom,
+                       .r[3] = (unsigned int)kevent_list,
+                       // will be pushed onto the stack as arg4/5
+                       .r[4] = (unsigned int)upcall_flags,
+                       .r[5] = (unsigned int)kevent_count,
+
+                       .sp = (int)(addrs->stack_top)
+               };
  
-               if (qos.qos_tier == QOS_CLASS_UNSPECIFIED) {
-                       qos_rv = EINVAL;
-                       goto voucher;
+               int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
+               if (error != KERN_SUCCESS) {
+                       panic(__func__ ": thread_set_wq_state failed: %d", error);
                 }
+       } else {
+#if defined(__arm64__)
+               arm_thread_state64_t state = {
+                       .pc = (uint64_t)wqstart_fnptr,
+                       .x[0] = (uint64_t)addrs->self,
+                       .x[1] = (uint64_t)kport,
+                       .x[2] = (uint64_t)addrs->stack_bottom,
+                       .x[3] = (uint64_t)kevent_list,
+                       .x[4] = (uint64_t)upcall_flags,
+                       .x[5] = (uint64_t)kevent_count,
+
+                       .sp = (uint64_t)((vm_offset_t)addrs->stack_top),
+               };
  
-               /* If we're a workqueue, the threadlist item priority needs adjusting, along with the bucket we were running in. */
-               if (tl) {
-                       workqueue_lock_spin(wq);
-                       bool now_under_constrained_limit = false;
+               int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
+               if (error != KERN_SUCCESS) {
+                       panic(__func__ ": thread_set_wq_state failed: %d", error);
+               }
+#else /* defined(__arm64__) */
+               panic("Shouldn't have a 64-bit thread on a 32-bit kernel...");
+#endif /* defined(__arm64__) */
+       }
+#else
+#error setup_wqthread  not defined for this architecture
+#endif
+}
  
-                       assert(!(tl->th_flags & TH_LIST_KEVENT_BOUND));
+static inline int
+workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs,
+               user_addr_t eventlist, int nevents, int kevent_flags,
+               user_addr_t *kevent_list_out, int *kevent_count_out)
+{
+       int ret;
  
-                       kr = pthread_kern->thread_set_workq_qos(th, qos.qos_tier, qos.tier_importance);
-                       assert(kr == KERN_SUCCESS || kr == KERN_TERMINATED);
+       user_addr_t kevent_list = th_addrs->self -
+                       WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
+       user_addr_t data_buf = kevent_list - WQ_KEVENT_DATA_SIZE;
+       user_size_t data_available = WQ_KEVENT_DATA_SIZE;
  
-                       /* Fix up counters. */
-                       uint8_t old_bucket = tl->th_priority;
-                       uint8_t new_bucket = pthread_priority_get_class_index(priority);
-                       if (old_bucket == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                               was_manager_thread = true;
-                       }
+       ret = pthread_kern->kevent_workq_internal(p, eventlist, nevents,
+                       kevent_list, WQ_KEVENT_LIST_LEN,
+                       data_buf, &data_available,
+                       kevent_flags, kevent_count_out);
  
-                       uint32_t old_active = OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]);
-                       OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]);
-
-                       wq->wq_thscheduled_count[old_bucket]--;
-                       wq->wq_thscheduled_count[new_bucket]++;
-
-                       bool old_overcommit = !(tl->th_flags & TH_LIST_CONSTRAINED);
-                       bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-                       if (!old_overcommit && new_overcommit) {
-                               wq->wq_constrained_threads_scheduled--;
-                               tl->th_flags &= ~TH_LIST_CONSTRAINED;
-                               if (wq->wq_constrained_threads_scheduled == wq_max_constrained_threads - 1) {
-                                       now_under_constrained_limit = true;
-                               }
-                       } else if (old_overcommit && !new_overcommit) {
-                               wq->wq_constrained_threads_scheduled++;
-                               tl->th_flags |= TH_LIST_CONSTRAINED;
-                       }
+       // squash any errors into just empty output
+       if (ret != 0 || *kevent_count_out == -1) {
+               *kevent_list_out = NULL;
+               *kevent_count_out = 0;
+               return ret;
+       }
  
-                       tl->th_priority = new_bucket;
-
-                       /* If we were at the ceiling of threads for a given bucket, we have
-                        * to reevaluate whether we should start more work.
-                        */
-                       if (old_active == wq->wq_reqconc[old_bucket] || now_under_constrained_limit) {
-                               /* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */
-                               (void)workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_DEFAULT, 0, false);
-                       } else {
-                               workqueue_unlock(wq);
-                       }
-               } else {
-                       kr = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
-                       if (kr != KERN_SUCCESS) {
-                               qos_rv = EINVAL;
-                       }
-               }
-
-               PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0);
-       }
-
-voucher:
-       if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) {
-               kr = pthread_kern->thread_set_voucher_name(voucher);
-               if (kr != KERN_SUCCESS) {
-                       voucher_rv = ENOENT;
-                       goto fixedpri;
-               }
-       }
-
-fixedpri:
-       if (qos_rv) goto done;
-       if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
-               thread_extended_policy_data_t extpol = {.timeshare = 0};
-
-               if (!tl) tl  = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       /* Not allowed on workqueue threads */
-                       fixedpri_rv = ENOTSUP;
-                       goto done;
-               }
-
-               kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
-               if (kr != KERN_SUCCESS) {
-                       fixedpri_rv = EINVAL;
-                       goto done;
-               }
-       } else if ((flags & _PTHREAD_SET_SELF_TIMESHARE_FLAG) != 0) {
-               thread_extended_policy_data_t extpol = {.timeshare = 1};
-
-               if (!tl) tl = util_get_thread_threadlist_entry(th);
-               if (tl) {
-                       /* Not allowed on workqueue threads */
-                       fixedpri_rv = ENOTSUP;
-                       goto done;
-               }
-
-               kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
-               if (kr != KERN_SUCCESS) {
-                       fixedpri_rv = EINVAL;
-                       goto done;
-               }
-       }
-
-done:
-       if (qos_rv && voucher_rv) {
-               /* Both failed, give that a unique error. */
-               return EBADMSG;
-       }
-
-       if (qos_rv) {
-               return qos_rv;
-       }
-
-       if (voucher_rv) {
-               return voucher_rv;
-       }
-
-       if (fixedpri_rv) {
-               return fixedpri_rv;
-       }
-       
-       return 0;
-}
-
-int
-_bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
-{
-       thread_t th;
-       int rv = 0;
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       int override_qos = pthread_priority_get_thread_qos(priority);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (tl) {
-               PTHREAD_TRACE_WQ(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
-       }
-
-       /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
-       pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
-                       resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE, USER_ADDR_NULL, MACH_PORT_NULL);
-       thread_deallocate(th);
-       return rv;
-}
-
-int
-_bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int __unused *retval)
-{
-       thread_t th;
-       int rv = 0;
-
-       if (arg3 != 0) {
-               return EINVAL;
-       }
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (tl) {
-               PTHREAD_TRACE_WQ(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 0, 0, 0);
-       }
-
-       pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
-
-       thread_deallocate(th);
-       return rv;
-}
-
-static int
-_bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, user_addr_t ulock_addr)
-{
-       thread_t th;
-       int rv = 0;
-
-       if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-               return ESRCH;
-       }
-
-       int override_qos = pthread_priority_get_thread_qos(priority);
-
-       struct threadlist *tl = util_get_thread_threadlist_entry(th);
-       if (!tl) {
-               thread_deallocate(th);
-               return EPERM;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
-
-       rv = pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
-                       resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE, ulock_addr, kport);
-
-       thread_deallocate(th);
-       return rv;
-}
-
-int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd,
-               mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
-{
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, resource, USER_ADDR_NULL);
-}
-
-int
-_bsdthread_ctl_qos_override_dispatch(struct proc *p __unused, user_addr_t cmd __unused, mach_port_name_t kport, pthread_priority_t priority, user_addr_t ulock_addr, int __unused *retval)
-{
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, USER_ADDR_NULL, ulock_addr);
-}
-
-int
-_bsdthread_ctl_qos_override_reset(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
-{
-       if (arg1 != 0 || arg2 != 0 || arg3 != 0) {
-               return EINVAL;
-       }
-
-       return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, 1 /* reset_all */, 0, 0, retval);
-}
-
-int
-_bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval)
-{
-       if ((reset_all && (resource != 0)) || arg3 != 0) {
-               return EINVAL;
-       }
-
-       thread_t th = current_thread();
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-
-       if (!tl) {
-               return EPERM;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, 0, 0, 0, 0);
-
-       resource = reset_all ? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD : resource;
-       pthread_kern->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
-
-       return 0;
-}
-
-int
-_bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
-{
-       switch (cmd) {
-       case BSDTHREAD_CTL_SET_QOS:
-               return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_START:
-               return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_END:
-               return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
-               return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
-               return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD:
-               return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-       case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET:
-               return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, (int)arg1, arg2, arg3, retval);
-       case BSDTHREAD_CTL_SET_SELF:
-               return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval);
-       default:
-               return EINVAL;
-       }
-}
-
-#pragma mark - Workqueue Implementation
-#pragma mark workqueue lock
-
-static boolean_t workqueue_lock_spin_is_acquired_kdp(struct workqueue *wq) {
-  return kdp_lck_spin_is_acquired(&wq->wq_lock);
-}
-
-static void
-workqueue_lock_spin(struct workqueue *wq)
-{
-       boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
-       lck_spin_lock(&wq->wq_lock);
-       wq->wq_interrupt_state = interrupt_state;
-}
-
-static void
-workqueue_unlock(struct workqueue *wq)
-{
-       boolean_t interrupt_state = wq->wq_interrupt_state;
-       lck_spin_unlock(&wq->wq_lock);
-       ml_set_interrupts_enabled(interrupt_state);
-}
-
-#pragma mark workqueue add timer
-
-/**
- * Sets up the timer which will call out to workqueue_add_timer
- */
-static void
-workqueue_interval_timer_start(struct workqueue *wq)
-{
-       uint64_t deadline;
-
-       /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the
-        ATIMER_RUNNING flag is not present.  The net effect here is that if a
-        sequence of threads is required, we'll double the time before we give out
-        the next one. */
-       if (wq->wq_timer_interval == 0) {
-               wq->wq_timer_interval = wq_stalled_window_usecs;
-
-       } else {
-               wq->wq_timer_interval = wq->wq_timer_interval * 2;
-
-               if (wq->wq_timer_interval > wq_max_timer_interval_usecs) {
-                       wq->wq_timer_interval = wq_max_timer_interval_usecs;
-               }
-       }
-       clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0);
-
-       boolean_t ret = thread_call_enter1_delayed(wq->wq_atimer_delayed_call, wq->wq_atimer_delayed_call, deadline);
-       if (ret) {
-               panic("delayed_call was already enqueued");
-       }
-}
-
-/**
- * Immediately trigger the workqueue_add_timer
- */
-static void
-workqueue_interval_timer_trigger(struct workqueue *wq)
-{
-       PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, 0, 0);
-
-       boolean_t ret = thread_call_enter1(wq->wq_atimer_immediate_call, wq->wq_atimer_immediate_call);
-       if (ret) {
-               panic("immediate_call was already enqueued");
-       }
-}
-
-/**
- * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts
- */
-static boolean_t
-wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
-{
-       clock_sec_t     secs;
-       clock_usec_t    usecs;
-       uint64_t lastblocked_ts;
-       uint64_t elapsed;
-
-       /*
-        * the timestamp is updated atomically w/o holding the workqueue lock
-        * so we need to do an atomic read of the 64 bits so that we don't see
-        * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
-        * independent fashion by using OSCompareAndSwap64 to write back the
-        * value we grabbed... if it succeeds, then we have a good timestamp to
-        * evaluate... if it fails, we straddled grabbing the timestamp while it
-        * was being updated... treat a failed update as a busy thread since
-        * it implies we are about to see a really fresh timestamp anyway
-        */
-       lastblocked_ts = *lastblocked_tsp;
-
-       if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
-               return (TRUE);
-
-       if (lastblocked_ts >= cur_ts) {
-               /*
-                * because the update of the timestamp when a thread blocks isn't
-                * serialized against us looking at it (i.e. we don't hold the workq lock)
-                * it's possible to have a timestamp that matches the current time or
-                * that even looks to be in the future relative to when we grabbed the current
-                * time... just treat this as a busy thread since it must have just blocked.
-                */
-               return (TRUE);
-       }
-       elapsed = cur_ts - lastblocked_ts;
-
-       pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
-
-       if (secs == 0 && usecs < wq_stalled_window_usecs)
-               return (TRUE);
-       return (FALSE);
-}
-
-static inline bool
-WQ_TIMER_DELAYED_NEEDED(struct workqueue *wq)
-{
-       int oldflags;
-retry:
-       oldflags = wq->wq_flags;
-       if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_DELAYED_RUNNING))) {
-               if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_DELAYED_RUNNING, (UInt32 *)&wq->wq_flags)) {
-                       return true;
-               } else {
-                       goto retry;
-               }
-       }
-       return false;
-}
-
-static inline bool
-WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue *wq)
-{
-       int oldflags;
-retry:
-       oldflags = wq->wq_flags;
-       if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_IMMEDIATE_RUNNING))) {
-               if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_IMMEDIATE_RUNNING, (UInt32 *)&wq->wq_flags)) {
-                       return true;
-               } else {
-                       goto retry;
-               }
-       }
-       return false;
-}
-
-/**
- * handler function for the timer
- */
-static void
-workqueue_add_timer(struct workqueue *wq, thread_call_t thread_call_self)
-{
-       proc_t          p;
-       boolean_t       start_timer = FALSE;
-       boolean_t       retval;
-
-       PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0);
-
-       p = wq->wq_proc;
-
-       workqueue_lock_spin(wq);
-
-       /*
-        * There's two tricky issues here.
-        *
-        * First issue: we start the thread_call's that invoke this routine without
-        * the workqueue lock held.  The scheduler callback needs to trigger
-        * reevaluation of the number of running threads but shouldn't take that
-        * lock, so we can't use it to synchronize state around the thread_call.
-        * As a result, it might re-enter the thread_call while this routine is
-        * already running.  This could cause it to fire a second time and we'll
-        * have two add_timers running at once.  Obviously, we don't want that to
-        * keep stacking, so we need to keep it at two timers.
-        *
-        * Solution: use wq_flags (accessed via atomic CAS) to synchronize the
-        * enqueue of the thread_call itself.  When a thread needs to trigger the
-        * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets
-        * the flag then does a thread_call_enter.  We'll then remove that flag
-        * only once we've got the lock and it's safe for the thread_call to be
-        * entered again.
-        *
-        * Second issue: we need to make sure that the two timers don't execute this
-        * routine concurrently.  We can't use the workqueue lock for this because
-        * we'll need to drop it during our execution.
-        *
-        * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that
-        * we are currently executing the routine and the next thread should wait.
-        *
-        * After all that, we arrive at the following four possible states:
-        * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY       no pending timer, no active timer
-        * !WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY       no pending timer,  1 active timer
-        *  WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY        1 pending timer, no active timer
-        *  WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY        1 pending timer,  1 active timer
-        *
-        * Further complication sometimes we need to trigger this function to run
-        * without delay.  Because we aren't under a lock between setting
-        * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply
-        * re-enter the thread call: if thread_call_enter() returned false, we
-        * wouldn't be able to distinguish the case where the thread_call had
-        * already fired from the case where it hadn't been entered yet from the
-        * other thread.  So, we use a separate thread_call for immediate
-        * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING.
-        */
-
-       while (wq->wq_lflags & WQL_ATIMER_BUSY) {
-               wq->wq_lflags |= WQL_ATIMER_WAITING;
-
-               assert_wait((caddr_t)wq, (THREAD_UNINT));
-               workqueue_unlock(wq);
-
-               thread_block(THREAD_CONTINUE_NULL);
-
-               workqueue_lock_spin(wq);
-       }
-       wq->wq_lflags |= WQL_ATIMER_BUSY;
-
-       /*
-        * Decide which timer we are and remove the RUNNING flag.
-        */
-       if (thread_call_self == wq->wq_atimer_delayed_call) {
-               if ((wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING) == 0) {
-                       panic("workqueue_add_timer is the delayed timer but the delayed running flag isn't set");
-               }
-               WQ_UNSETFLAG(wq, WQ_ATIMER_DELAYED_RUNNING);
-       } else if (thread_call_self == wq->wq_atimer_immediate_call) {
-               if ((wq->wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) == 0) {
-                       panic("workqueue_add_timer is the immediate timer but the immediate running flag isn't set");
-               }
-               WQ_UNSETFLAG(wq, WQ_ATIMER_IMMEDIATE_RUNNING);
-       } else {
-               panic("workqueue_add_timer can't figure out which timer it is");
-       }
-
-again:
-       retval = TRUE;
-       if ( !(wq->wq_flags & WQ_EXITING)) {
-               boolean_t add_thread = FALSE;
-               /*
-                * check to see if the stall frequency was beyond our tolerance
-                * or we have work on the queue, but haven't scheduled any
-                * new work within our acceptable time interval because
-                * there were no idle threads left to schedule
-                */
-               if (wq->wq_reqcount) {
-                       uint32_t        priclass = 0;
-                       uint32_t        thactive_count = 0;
-                       uint64_t        curtime = mach_absolute_time();
-                       uint64_t        busycount = 0;
-
-                       if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
-                               wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
-                               priclass = WORKQUEUE_EVENT_MANAGER_BUCKET;
-                       } else {
-                               for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
-                                       if (wq->wq_requests[priclass])
-                                               break;
-                               }
-                       }
-
-                       if (priclass < WORKQUEUE_EVENT_MANAGER_BUCKET){
-                               /*
-                                * Compute a metric for many how many threads are active.  We
-                                * find the highest priority request outstanding and then add up
-                                * the number of active threads in that and all higher-priority
-                                * buckets.  We'll also add any "busy" threads which are not
-                                * active but blocked recently enough that we can't be sure
-                                * they've gone idle yet.  We'll then compare this metric to our
-                                * max concurrency to decide whether to add a new thread.
-                                */
-                               for (uint32_t i = 0; i <= priclass; i++) {
-                                       thactive_count += wq->wq_thactive_count[i];
-
-                                       if (wq->wq_thscheduled_count[i] < wq->wq_thactive_count[i]) {
-                                               if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i]))
-                                                       busycount++;
-                                       }
-                               }
-                       }
-
-                       if (thactive_count + busycount < wq->wq_max_concurrency ||
-                               priclass == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-
-                               if (wq->wq_thidlecount == 0) {
-                                       /*
-                                        * if we have no idle threads, try to add one
-                                        */
-                                       retval = workqueue_addnewthread(wq, priclass == WORKQUEUE_EVENT_MANAGER_BUCKET);
-                               }
-                               add_thread = TRUE;
-                       }
-
-                       if (wq->wq_reqcount) {
-                               /*
-                                * as long as we have threads to schedule, and we successfully
-                                * scheduled new work, keep trying
-                                */
-                               while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
-                                       /*
-                                        * workqueue_run_nextreq is responsible for
-                                        * dropping the workqueue lock in all cases
-                                        */
-                                       retval = (workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_ADD_TIMER, 0, false) != THREAD_NULL);
-                                       workqueue_lock_spin(wq);
-
-                                       if (retval == FALSE)
-                                               break;
-                               }
-                               if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) {
-
-                                       if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
-                                               goto again;
-
-                                       if (wq->wq_thidlecount == 0 || busycount) {
-                                               start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
-                                       }
-
-                                       PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0);
-                               }
-                       }
-               }
-       }
-
-       /*
-        * If we called WQ_TIMER_NEEDED above, then this flag will be set if that
-        * call marked the timer running.  If so, we let the timer interval grow.
-        * Otherwise, we reset it back to 0.
-        */
-       if (!(wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING)) {
-               wq->wq_timer_interval = 0;
-       }
-
-       wq->wq_lflags &= ~WQL_ATIMER_BUSY;
-
-       if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
-               /*
-                * wakeup the thread hung up in _workqueue_mark_exiting or workqueue_add_timer waiting for this timer
-                * to finish getting out of the way
-                */
-               wq->wq_lflags &= ~WQL_ATIMER_WAITING;
-               wakeup(wq);
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0);
-
-       workqueue_unlock(wq);
-
-       if (start_timer == TRUE)
-               workqueue_interval_timer_start(wq);
-}
-
-#pragma mark thread state tracking
-
-// called by spinlock code when trying to yield to lock owner
-void
-_workqueue_thread_yielded(void)
-{
-       struct workqueue *wq;
-       proc_t p;
-
-       p = current_proc();
-
-       if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL || wq->wq_reqcount == 0)
-               return;
-
-       workqueue_lock_spin(wq);
-
-       if (wq->wq_reqcount) {
-               uint64_t        curtime;
-               uint64_t        elapsed;
-               clock_sec_t     secs;
-               clock_usec_t    usecs;
-
-               if (wq->wq_thread_yielded_count++ == 0)
-                       wq->wq_thread_yielded_timestamp = mach_absolute_time();
-
-               if (wq->wq_thread_yielded_count < wq_yielded_threshold) {
-                       workqueue_unlock(wq);
-                       return;
-               }
-
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0);
-
-               wq->wq_thread_yielded_count = 0;
-
-               curtime = mach_absolute_time();
-               elapsed = curtime - wq->wq_thread_yielded_timestamp;
-               pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
-
-               if (secs == 0 && usecs < wq_yielded_window_usecs) {
-
-                       if (wq->wq_thidlecount == 0) {
-                               workqueue_addnewthread(wq, TRUE);
-                               /*
-                                * 'workqueue_addnewthread' drops the workqueue lock
-                                * when creating the new thread and then retakes it before
-                                * returning... this window allows other threads to process
-                                * requests, so we need to recheck for available work
-                                * if none found, we just return...  the newly created thread
-                                * will eventually get used (if it hasn't already)...
-                                */
-                               if (wq->wq_reqcount == 0) {
-                                       workqueue_unlock(wq);
-                                       return;
-                               }
-                       }
-                       if (wq->wq_thidlecount) {
-                               (void)workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_UNCONSTRAINED, 0, false);
-                               /*
-                                * workqueue_run_nextreq is responsible for
-                                * dropping the workqueue lock in all cases
-                                */
-                               PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0);
-
-                               return;
-                       }
-               }
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0);
-       }
-       workqueue_unlock(wq);
-}
-
-static void
-workqueue_callback(int type, thread_t thread)
-{
-       struct uthread    *uth;
-       struct threadlist *tl;
-       struct workqueue  *wq;
-
-       uth = pthread_kern->get_bsdthread_info(thread);
-       tl = pthread_kern->uthread_get_threadlist(uth);
-       wq = tl->th_workq;
-
-       switch (type) {
-       case SCHED_CALL_BLOCK: {
-               uint32_t        old_activecount;
-               boolean_t       start_timer = FALSE;
-
-               old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
-
-               /*
-                * If we blocked and were at the requested concurrency previously, we may
-                * need to spin up a new thread.  Of course, if it's the event manager
-                * then that's moot, so ignore that case.
-                */
-               if (old_activecount == wq->wq_reqconc[tl->th_priority] &&
-                       tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                       uint64_t        curtime;
-                       UInt64          *lastblocked_ptr;
-
-                       /*
-                        * the number of active threads at this priority
-                        * has fallen below the maximum number of concurrent
-                        * threads that we're allowed to run
-                        */
-                       lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority];
-                       curtime = mach_absolute_time();
-
-                       /*
-                        * if we collide with another thread trying to update the last_blocked (really unlikely
-                        * since another thread would have to get scheduled and then block after we start down
-                        * this path), it's not a problem.  Either timestamp is adequate, so no need to retry
-                        */
-
-                       OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
-
-                       if (wq->wq_reqcount) {
-                               /*
-                                * We have work to do so start up the timer if it's not
-                                * running; it'll sort out whether we need to start another
-                                * thread
-                                */
-                               start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
-                       }
-
-                       if (start_timer == TRUE) {
-                               workqueue_interval_timer_start(wq);
-                       }
-               }
-               PTHREAD_TRACE1_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq, old_activecount, tl->th_priority, start_timer, thread_tid(thread));
-               break;
-       }
-       case SCHED_CALL_UNBLOCK:
-               /*
-                * we cannot take the workqueue_lock here...
-                * an UNBLOCK can occur from a timer event which
-                * is run from an interrupt context... if the workqueue_lock
-                * is already held by this processor, we'll deadlock...
-                * the thread lock for the thread being UNBLOCKED
-                * is also held
-                */
-               OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority]);
-
-               PTHREAD_TRACE1_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, 0, thread_tid(thread));
-
-               break;
-       }
-}
-
-sched_call_t
-_workqueue_get_sched_callback(void)
-{
-       return workqueue_callback;
-}
-
-#pragma mark thread addition/removal
-
-static mach_vm_size_t
-_workqueue_allocsize(struct workqueue *wq)
-{
-       proc_t p = wq->wq_proc;
-       mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
-       mach_vm_size_t pthread_size =
-               vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
-       return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
-}
-
-/**
- * pop goes the thread
- *
- * If fromexit is set, the call is from workqueue_exit(,
- * so some cleanups are to be avoided.
- */
-static void
-workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use)
-{
-       struct uthread * uth;
-       struct workqueue * wq = tl->th_workq;
-
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
-       } else {
-               TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-       }
-
-       if (fromexit == 0) {
-               assert(wq->wq_nthreads && wq->wq_thidlecount);
-               wq->wq_nthreads--;
-               wq->wq_thidlecount--;
-       }
-
-       /*
-        * Clear the threadlist pointer in uthread so
-        * blocked thread on wakeup for termination will
-        * not access the thread list as it is going to be
-        * freed.
-        */
-       pthread_kern->thread_sched_call(tl->th_thread, NULL);
-
-       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-       if (uth != (struct uthread *)0) {
-               pthread_kern->uthread_set_threadlist(uth, NULL);
-       }
-       if (fromexit == 0) {
-               /* during exit the lock is not held */
-               workqueue_unlock(wq);
-       }
-
-       if ( (tl->th_flags & TH_LIST_NEW) || first_use ) {
-               /*
-                * thread was created, but never used...
-                * need to clean up the stack and port ourselves
-                * since we're not going to spin up through the
-                * normal exit path triggered from Libc
-                */
-               if (fromexit == 0) {
-                       /* vm map is already deallocated when this is called from exit */
-                       (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, _workqueue_allocsize(wq));
-               }
-               (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport);
-
-       } else {
-
-               PTHREAD_TRACE1_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
-       }
-       /*
-        * drop our ref on the thread
-        */
-       thread_deallocate(tl->th_thread);
-
-       kfree(tl, sizeof(struct threadlist));
-}
-
-
-/**
- * Try to add a new workqueue thread.
- *
- * - called with workq lock held
- * - dropped and retaken around thread creation
- * - return with workq lock held
- */
-static boolean_t
-workqueue_addnewthread(struct workqueue *wq, boolean_t ignore_constrained_thread_limit)
-{
-       struct threadlist *tl;
-       struct uthread  *uth;
-       kern_return_t   kret;
-       thread_t        th;
-       proc_t          p;
-       void            *sright;
-       mach_vm_offset_t stackaddr;
-
-       if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_add_during_exit | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
-               return (FALSE);
-       }
-
-       if (wq->wq_nthreads >= wq_max_threads) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_limit_exceeded | DBG_FUNC_NONE, wq, wq->wq_nthreads, wq_max_threads, 0, 0);
-               return (FALSE);
-       }
-
-       if (ignore_constrained_thread_limit == FALSE &&
-               wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-               /*
-                * If we're not creating this thread to service an overcommit or
-                * event manager request, then we check to see if we are over our
-                * constrained thread limit, in which case we error out.
-                */
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_constrained_maxed | DBG_FUNC_NONE, wq, wq->wq_constrained_threads_scheduled,
-                               wq_max_constrained_threads, 0, 0);
-               return (FALSE);
-       }
-
-       wq->wq_nthreads++;
-
-       p = wq->wq_proc;
-       workqueue_unlock(wq);
-
-       tl = kalloc(sizeof(struct threadlist));
-       bzero(tl, sizeof(struct threadlist));
-
-       kret = pthread_kern->thread_create_workq_waiting(wq->wq_task, wq_unpark_continue, tl, &th);
-       if (kret != KERN_SUCCESS) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 0, 0, 0);
-               kfree(tl, sizeof(struct threadlist));
-               goto failed;
-       }
-
-       stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
-
-       mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
-       mach_vm_size_t pthread_size =
-               vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
-       mach_vm_size_t th_allocsize = guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
-
-       kret = mach_vm_map(wq->wq_map, &stackaddr,
-                       th_allocsize, page_size-1,
-                       VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, NULL,
-                       0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-                       VM_INHERIT_DEFAULT);
-
-       if (kret != KERN_SUCCESS) {
-               PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 1, 0, 0);
-
-               kret = mach_vm_allocate(wq->wq_map,
-                               &stackaddr, th_allocsize,
-                               VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
-       }
-       if (kret == KERN_SUCCESS) {
-               /*
-                * The guard page is at the lowest address
-                * The stack base is the highest address
-                */
-               kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
-
-               if (kret != KERN_SUCCESS) {
-                       (void) mach_vm_deallocate(wq->wq_map, stackaddr, th_allocsize);
-                       PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 2, 0, 0);
-               }
-       }
-       if (kret != KERN_SUCCESS) {
-               (void) thread_terminate(th);
-               thread_deallocate(th);
-
-               kfree(tl, sizeof(struct threadlist));
-               goto failed;
-       }
-       thread_reference(th);
-
-       pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE);
-
-       sright = (void *)pthread_kern->convert_thread_to_port(th);
-       tl->th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(wq->wq_task));
-
-       pthread_kern->thread_static_param(th, TRUE);
-
-       tl->th_flags = TH_LIST_INITED | TH_LIST_NEW;
-
-       tl->th_thread = th;
-       tl->th_workq = wq;
-       tl->th_stackaddr = stackaddr;
-       tl->th_priority = WORKQUEUE_NUM_BUCKETS;
-
-       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-
-       workqueue_lock_spin(wq);
-
-       pthread_kern->uthread_set_threadlist(uth, tl);
-       TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
-
-       wq->wq_thidlecount++;
-
-       PTHREAD_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
-
-       return (TRUE);
-
-failed:
-       workqueue_lock_spin(wq);
-       wq->wq_nthreads--;
-
-       return (FALSE);
-}
-
-/**
- * Setup per-process state for the workqueue.
- */
-int
-_workq_open(struct proc *p, __unused int32_t *retval)
-{
-       struct workqueue * wq;
-       int wq_size;
-       char * ptr;
-       uint32_t i;
-       uint32_t num_cpus;
-       int error = 0;
-
-       if (pthread_kern->proc_get_register(p) == 0) {
-               return EINVAL;
-       }
-
-       num_cpus = pthread_kern->ml_get_max_cpus();
-
-       if (wq_init_constrained_limit) {
-               uint32_t limit;
-               /*
-                * set up the limit for the constrained pool
-                * this is a virtual pool in that we don't
-                * maintain it on a separate idle and run list
-                */
-               limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
-
-               if (limit > wq_max_constrained_threads)
-                       wq_max_constrained_threads = limit;
-
-               wq_init_constrained_limit = 0;
-
-               if (wq_max_threads > pthread_kern->config_thread_max - 20) {
-                       wq_max_threads = pthread_kern->config_thread_max - 20;
-               }
-       }
-
-       if (pthread_kern->proc_get_wqptr(p) == NULL) {
-               if (pthread_kern->proc_init_wqptr_or_wait(p) == FALSE) {
-                       assert(pthread_kern->proc_get_wqptr(p) != NULL);
-                       goto out;
-               }
-
-               wq_size = sizeof(struct workqueue);
-
-               ptr = (char *)kalloc(wq_size);
-               bzero(ptr, wq_size);
-
-               wq = (struct workqueue *)ptr;
-               wq->wq_flags = WQ_LIST_INITED;
-               wq->wq_proc = p;
-               wq->wq_max_concurrency = wq_max_concurrency;
-               wq->wq_task = current_task();
-               wq->wq_map  = pthread_kern->current_map();
-
-               for (i = 0; i < WORKQUEUE_NUM_BUCKETS; i++)
-                       wq->wq_reqconc[i] = (uint16_t)wq->wq_max_concurrency;
-
-               // The event manager bucket is special, so its gets a concurrency of 1
-               // though we shouldn't ever read this value for that bucket
-               wq->wq_reqconc[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
-
-               // Start the event manager at the priority hinted at by the policy engine
-               int mgr_priority_hint = pthread_kern->task_get_default_manager_qos(current_task());
-               wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-
-               TAILQ_INIT(&wq->wq_thrunlist);
-               TAILQ_INIT(&wq->wq_thidlelist);
-
-               wq->wq_atimer_delayed_call =
-                               thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
-                                               (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
-               wq->wq_atimer_immediate_call =
-                               thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
-                                               (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
-
-               lck_spin_init(&wq->wq_lock, pthread_lck_grp, pthread_lck_attr);
-
-               pthread_kern->proc_set_wqptr(p, wq);
-
-       }
-out:
-
-       return(error);
-}
-
-/*
- * Routine:    workqueue_mark_exiting
- *
- * Function:   Mark the work queue such that new threads will not be added to the
- *             work queue after we return.
- *
- * Conditions: Called against the current process.
- */
-void
-_workqueue_mark_exiting(struct proc *p)
-{
-       struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
-
-       if (wq != NULL) {
-
-               PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-               workqueue_lock_spin(wq);
-
-               /*
-                * We arm the add timer without holding the workqueue lock so we need
-                * to synchronize with any running or soon to be running timers.
-                *
-                * Threads that intend to arm the timer atomically OR
-                * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if
-                * WQ_EXITING is not present.  So, once we have set WQ_EXITING, we can
-                * be sure that no new RUNNING flags will be set, but still need to
-                * wait for the already running timers to complete.
-                *
-                * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so
-                * the check for and sleep until clear is protected.
-                */
-               WQ_SETFLAG(wq, WQ_EXITING);
-
-               if (wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING) {
-                       if (thread_call_cancel(wq->wq_atimer_delayed_call) == TRUE) {
-                               WQ_UNSETFLAG(wq, WQ_ATIMER_DELAYED_RUNNING);
-                       }
-               }
-               if (wq->wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) {
-                       if (thread_call_cancel(wq->wq_atimer_immediate_call) == TRUE) {
-                               WQ_UNSETFLAG(wq, WQ_ATIMER_IMMEDIATE_RUNNING);
-                       }
-               }
-               while (wq->wq_flags & (WQ_ATIMER_DELAYED_RUNNING | WQ_ATIMER_IMMEDIATE_RUNNING) ||
-                               (wq->wq_lflags & WQL_ATIMER_BUSY)) {
-                       assert_wait((caddr_t)wq, (THREAD_UNINT));
-                       workqueue_unlock(wq);
-
-                       thread_block(THREAD_CONTINUE_NULL);
-
-                       workqueue_lock_spin(wq);
-               }
-               workqueue_unlock(wq);
-
-               PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
-       }
-}
-
-/*
- * Routine:    workqueue_exit
- *
- * Function:   clean up the work queue structure(s) now that there are no threads
- *             left running inside the work queue (except possibly current_thread).
- *
- * Conditions: Called by the last thread in the process.
- *             Called against current process.
- */
-void
-_workqueue_exit(struct proc *p)
-{
-       struct workqueue  * wq;
-       struct threadlist  * tl, *tlist;
-       struct uthread  *uth;
-       size_t wq_size = sizeof(struct workqueue);
-
-       wq = pthread_kern->proc_get_wqptr(p);
-       if (wq != NULL) {
-
-               PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-               pthread_kern->proc_set_wqptr(p, NULL);
-
-               /*
-                * Clean up workqueue data structures for threads that exited and
-                * didn't get a chance to clean up after themselves.
-                */
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) != 0);
-
-                       pthread_kern->thread_sched_call(tl->th_thread, NULL);
-
-                       uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-                       if (uth != (struct uthread *)0) {
-                               pthread_kern->uthread_set_threadlist(uth, NULL);
-                       }
-                       TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-
-                       /*
-                        * drop our last ref on the thread
-                        */
-                       thread_deallocate(tl->th_thread);
-
-                       kfree(tl, sizeof(struct threadlist));
-               }
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-                       assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
-                       workqueue_removethread(tl, true, false);
-               }
-               TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlemgrlist, th_entry, tlist) {
-                       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-                       assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-                       workqueue_removethread(tl, true, false);
-               }
-               thread_call_free(wq->wq_atimer_delayed_call);
-               thread_call_free(wq->wq_atimer_immediate_call);
-               lck_spin_destroy(&wq->wq_lock, pthread_lck_grp);
-
-               kfree(wq, wq_size);
-
-               PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
-       }
-}
-
-
-#pragma mark workqueue thread manipulation
-
-/**
- * Entry point for libdispatch to ask for threads
- */
-static int wqops_queue_reqthreads(struct proc *p, int reqcount, pthread_priority_t priority){
-       struct workqueue *wq;
-       boolean_t start_timer = FALSE;
-
-       boolean_t overcommit = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0;
-       int class = pthread_priority_get_class_index(priority);
-
-       boolean_t event_manager = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) != 0;
-       if (event_manager){
-               class = WORKQUEUE_EVENT_MANAGER_BUCKET;
-       }
-
-       if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS) || (overcommit && event_manager)) {
-               return EINVAL;
-       }
-       
-
-       if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       workqueue_lock_spin(wq);
-       
-       if (overcommit == 0 && event_manager == 0) {
-               wq->wq_reqcount += reqcount;
-               wq->wq_requests[class] += reqcount;
-               
-               PTHREAD_TRACE_WQ(TRACE_wq_req_threads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
-               
-               while (wq->wq_reqcount) {
-                       if (!workqueue_run_one(p, wq, overcommit, 0))
-                               break;
-               }
-       } else if (overcommit) {
-               PTHREAD_TRACE_WQ(TRACE_wq_req_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_ocrequests[class], reqcount, 0);
-               
-               while (reqcount) {
-                       if (!workqueue_run_one(p, wq, overcommit, priority))
-                               break;
-                       reqcount--;
-               }
-               if (reqcount) {
-                       /*
-                        * We need to delay starting some of the overcommit requests.
-                        * We'll record the request here and as existing threads return to
-                        * the kernel, we'll notice the ocrequests and spin them back to
-                        * user space as the overcommit variety.
-                        */
-                       wq->wq_reqcount += reqcount;
-                       wq->wq_requests[class] += reqcount;
-                       wq->wq_ocrequests[class] += reqcount;
-                       
-                       PTHREAD_TRACE_WQ(TRACE_wq_delay_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_ocrequests[class], reqcount, 0);
-
-                       /*
-                        * If we delayed this thread coming up but we're not constrained
-                        * or at max threads then we need to start the timer so we don't
-                        * risk dropping this request on the floor.
-                        */
-                       if ((wq->wq_constrained_threads_scheduled < wq_max_constrained_threads) &&
-                                       (wq->wq_nthreads < wq_max_threads)){
-                               start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
-                       }
-               }
-       } else if (event_manager) {
-               PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, wq->wq_event_manager_priority, wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET], 0);
-
-               if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
-                       wq->wq_reqcount += 1;
-                       wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
-               }
-
-               // We've recorded the request for an event manager thread above.  We'll
-               // let the timer pick it up as we would for a kernel callout.  We can
-               // do a direct add/wakeup when that support is added for the kevent path.
-               if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
-                       start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
-               }
-       }
-
-       if (start_timer) {
-               workqueue_interval_timer_start(wq);
-       }
-
-       workqueue_unlock(wq);
-
-       return 0;
-}
-
-/*
- * Used by the kevent system to request threads.
- *
- * Currently count is ignored and we always return one thread per invocation.
- */
-thread_t _workq_reqthreads(struct proc *p, int requests_count, workq_reqthreads_req_t requests){
-       thread_t th = THREAD_NULL;
-       boolean_t do_thread_call = FALSE;
-       boolean_t emergency_thread = FALSE;
-       assert(requests_count > 0);
-
-#if DEBUG
-       // Make sure that the requests array is sorted, highest priority first
-       if (requests_count > 1){
-               __assert_only qos_class_t priority = _pthread_priority_get_qos_newest(requests[0].priority);
-               __assert_only unsigned long flags = ((_pthread_priority_get_flags(requests[0].priority) & (_PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) != 0);
-               for (int i = 1; i < requests_count; i++){
-                       if (requests[i].count == 0) continue;
-                       __assert_only qos_class_t next_priority = _pthread_priority_get_qos_newest(requests[i].priority);
-                       __assert_only unsigned long next_flags = ((_pthread_priority_get_flags(requests[i].priority) & (_PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) != 0);
-                       if (next_flags != flags){
-                               flags = next_flags;
-                               priority = next_priority;
-                       } else {
-                               assert(next_priority <= priority);
-                       }
-               }
-       }
-#endif // DEBUG
-
-       struct workqueue *wq;
-       if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return THREAD_NULL;
-       }
-
-       workqueue_lock_spin(wq);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_kevent_req_threads | DBG_FUNC_START, wq, requests_count, 0, 0, 0);
-
-       // Look for overcommit or event-manager-only requests.
-       boolean_t have_overcommit = FALSE;
-       pthread_priority_t priority = 0;
-       for (int i = 0; i < requests_count; i++){
-               if (requests[i].count == 0)
-                       continue;
-               priority = requests[i].priority;
-               if (_pthread_priority_get_qos_newest(priority) == QOS_CLASS_UNSPECIFIED){
-                       priority |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-               }
-               if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) != 0){
-                       goto event_manager;
-               }
-               if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
-                       have_overcommit = TRUE;
-                       break;
-               }
-       }
-
-       if (have_overcommit){
-               if (wq->wq_thidlecount){
-                       th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_OVERCOMMIT_KEVENT, priority, true);
-                       if (th != THREAD_NULL){
-                               goto out;
-                       } else {
-                               workqueue_lock_spin(wq); // reacquire lock
-                       }
-               }
-
-               int class = pthread_priority_get_class_index(priority);
-               wq->wq_reqcount += 1;
-               wq->wq_requests[class] += 1;
-               wq->wq_kevent_ocrequests[class] += 1;
-
-               do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq);
-               goto deferred;
-       }
-
-       // Having no overcommit requests, try to find any request that can start
-       // There's no TOCTTOU since we hold the workqueue lock
-       for (int i = 0; i < requests_count; i++){
-               workq_reqthreads_req_t req = requests + i;
-               priority = req->priority;
-               int class = pthread_priority_get_class_index(priority);
-
-               if (req->count == 0)
-                       continue;
-
-               if (!may_start_constrained_thread(wq, class, WORKQUEUE_NUM_BUCKETS, NULL))
-                       continue;
-
-               wq->wq_reqcount += 1;
-               wq->wq_requests[class] += 1;
-               wq->wq_kevent_requests[class] += 1;
-
-               PTHREAD_TRACE_WQ(TRACE_wq_req_kevent_threads | DBG_FUNC_NONE, wq, priority, wq->wq_kevent_requests[class], 1, 0);
-
-               if (wq->wq_thidlecount){
-                       th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_DEFAULT_KEVENT, priority, true);
-                       goto out;
-               } else {
-                       do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq);
-                       goto deferred;
-               }
-       }
-
-       // Okay, here's the fun case: we can't spin up any of the non-overcommit threads
-       // that we've seen a request for, so we kick this over to the event manager thread
-       emergency_thread = TRUE;
-
-event_manager:
-       if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
-               wq->wq_reqcount += 1;
-               wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
-               PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, 0, wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], 1, 0);
-       } else {
-               PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, 0, wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], 0, 0);
-       }
-       wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
-
-       if (wq->wq_thidlecount && wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
-               th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_EVENT_MANAGER, 0, true);
-               assert(th != THREAD_NULL);
-               goto out;
-       }
-       do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq);
-
-deferred:
-       workqueue_unlock(wq);
-
-       if (do_thread_call == TRUE){
-               workqueue_interval_timer_trigger(wq);
-       }
-
-out:
-       PTHREAD_TRACE_WQ(TRACE_wq_kevent_req_threads | DBG_FUNC_END, wq, do_thread_call, 0, 0, 0);
-
-       return emergency_thread ? (void*)-1 : th;
-}
-
-
-static int wqops_thread_return(struct proc *p){
-       thread_t th = current_thread();
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-
-       /* reset signal mask on the workqueue thread to default state */
-       if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
-               pthread_kern->proc_lock(p);
-               pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
-               pthread_kern->proc_unlock(p);
-       }
-
-       struct workqueue *wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-       if (wq == NULL || !tl) {
-               return EINVAL;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_START, tl->th_workq, 0, 0, 0, 0);
-
-       /*
-        * This squash call has neat semantics: it removes the specified overrides,
-        * replacing the current requested QoS with the previous effective QoS from
-        * those overrides.  This means we won't be preempted due to having our QoS
-        * lowered.  Of course, now our understanding of the thread's QoS is wrong,
-        * so we'll adjust below.
-        */
-       int new_qos =
-       pthread_kern->proc_usynch_thread_qos_squash_override_for_resource(th,
-                       THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD,
-                       THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
-
-       workqueue_lock_spin(wq);
-
-       if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
-               unsigned int flags = KEVENT_FLAG_WORKQ;
-               if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                       flags |= KEVENT_FLAG_WORKQ_MANAGER;
-               }
-
-               workqueue_unlock(wq);
-               kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, flags);
-               workqueue_lock_spin(wq);
-
-               tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
-       }
-
-       /* Fix up counters from the squash operation. */
-       uint8_t old_bucket = tl->th_priority;
-       uint8_t new_bucket = thread_qos_get_class_index(new_qos);
-
-       if (old_bucket != new_bucket) {
-               OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]);
-               OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]);
-
-               wq->wq_thscheduled_count[old_bucket]--;
-               wq->wq_thscheduled_count[new_bucket]++;
-
-               tl->th_priority = new_bucket;
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_END, tl->th_workq, new_qos, 0, 0, 0);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_runitem | DBG_FUNC_END, wq, 0, 0, 0, 0);
-
-       (void)workqueue_run_nextreq(p, wq, th, RUN_NEXTREQ_DEFAULT, 0, false);
-       /*
-        * workqueue_run_nextreq is responsible for
-        * dropping the workqueue lock in all cases
-        */
-       return 0;
-}
-
-/**
- * Multiplexed call to interact with the workqueue mechanism
- */
-int
-_workq_kernreturn(struct proc *p,
-                 int options,
-                 user_addr_t item,
-                 int arg2,
-                 int arg3,
-                 int32_t *retval)
-{
-       int error = 0;
-
-       if (pthread_kern->proc_get_register(p) == 0) {
-               return EINVAL;
-       }
-
-       switch (options) {
-       case WQOPS_QUEUE_NEWSPISUPP: {
-               /*
-                * arg2 = offset of serialno into dispatch queue
-                * arg3 = kevent support
-                */
-               int offset = arg2;
-               if (arg3 & 0x01){
-                       // If we get here, then userspace has indicated support for kevent delivery.
-               }
-
-               pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
-               break;
-       }
-       case WQOPS_QUEUE_REQTHREADS: {
-               /*
-                * arg2 = number of threads to start
-                * arg3 = priority
-                */
-               error = wqops_queue_reqthreads(p, arg2, arg3);
-               break;
-       }
-       case WQOPS_SET_EVENT_MANAGER_PRIORITY: {
-               /*
-                * arg2 = priority for the manager thread
-                *
-                * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the
-                * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead
-                * of a QOS value
-                */
-               pthread_priority_t pri = arg2;
-
-               struct workqueue *wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-               if (wq == NULL) {
-                       error = EINVAL;
-                       break;
-               }
-               workqueue_lock_spin(wq);
-               if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
-                       // If userspace passes a scheduling priority, that takes precidence
-                       // over any QoS.  (So, userspace should take care not to accidenatally
-                       // lower the priority this way.)
-                       uint32_t sched_pri = pri & (~_PTHREAD_PRIORITY_FLAGS_MASK);
-                       if (wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
-                               wq->wq_event_manager_priority = MAX(sched_pri, wq->wq_event_manager_priority & (~_PTHREAD_PRIORITY_FLAGS_MASK))
-                                               | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-                       } else {
-                               wq->wq_event_manager_priority = sched_pri
-                                               | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-                       }
-               } else if ((wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
-                       int cur_qos = pthread_priority_get_thread_qos(wq->wq_event_manager_priority);
-                       int new_qos = pthread_priority_get_thread_qos(pri);
-                       wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos, new_qos)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-               }
-               workqueue_unlock(wq);
-               break;
-       }
-       case WQOPS_THREAD_KEVENT_RETURN:
-               if (item != 0) {
-                       int32_t kevent_retval;
-                       int ret = kevent_qos_internal(p, -1, item, arg2, item, arg2, NULL, NULL, KEVENT_FLAG_WORKQ | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS, &kevent_retval);
-                       // We shouldn't be getting more errors out than events we put in, so
-                       // reusing the input buffer should always provide enough space.  But,
-                       // the assert is commented out since we get errors in edge cases in the
-                       // process lifecycle.
-                       //assert(ret == KERN_SUCCESS && kevent_retval >= 0);
-                       if (ret != KERN_SUCCESS){
-                               error = ret;
-                               break;
-                       } else if (kevent_retval > 0){
-                               assert(kevent_retval <= arg2);
-                               *retval = kevent_retval;
-                               error = 0;
-                               break;
-                       }
-               }
-               // FALLTHRU
-       case WQOPS_THREAD_RETURN:
-               error = wqops_thread_return(p);
-               // NOT REACHED except in case of error
-               assert(error);
-               break;
-       default:
-               error = EINVAL;
-               break;
-       }
-       return (error);
-}
-
-
-static boolean_t
-workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority)
-{
-       boolean_t       ran_one;
-
-       if (wq->wq_thidlecount == 0) {
-               if (overcommit == FALSE) {
-                       if (wq->wq_constrained_threads_scheduled < wq->wq_max_concurrency)
-                               workqueue_addnewthread(wq, overcommit);
-               } else {
-                       workqueue_addnewthread(wq, overcommit);
-
-                       if (wq->wq_thidlecount == 0)
-                               return (FALSE);
-               }
-       }
-       ran_one = (workqueue_run_nextreq(p, wq, THREAD_NULL, overcommit ? RUN_NEXTREQ_OVERCOMMIT : RUN_NEXTREQ_DEFAULT, priority, false) != THREAD_NULL);
-       /*
-        * workqueue_run_nextreq is responsible for
-        * dropping the workqueue lock in all cases
-        */
-       workqueue_lock_spin(wq);
-
-       return (ran_one);
-}
-
-/*
- * We have no work to do, park ourselves on the idle list.
- *
- * Consumes the workqueue lock and does not return.
- */
-static void __dead2
-parkit(struct workqueue *wq, struct threadlist *tl, thread_t thread)
-{
-       assert(thread == tl->th_thread);
-       assert(thread == current_thread());
-
-       uint32_t us_to_wait = 0;
-
-       TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-
-       tl->th_flags &= ~TH_LIST_RUNNING;
-       tl->th_flags &= ~TH_LIST_KEVENT;
-       assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-
-       if (tl->th_flags & TH_LIST_CONSTRAINED) {
-               wq->wq_constrained_threads_scheduled--;
-               tl->th_flags &= ~TH_LIST_CONSTRAINED;
-       }
-
-       OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
-       wq->wq_thscheduled_count[tl->th_priority]--;
-       wq->wq_threads_scheduled--;
-       uint32_t thidlecount = ++wq->wq_thidlecount;
-
-       pthread_kern->thread_sched_call(thread, NULL);
-
-       /*
-        * We'd like to always have one manager thread parked so that we can have
-        * low latency when we need to bring a manager thread up.  If that idle
-        * thread list is empty, make this thread a manager thread.
-        *
-        * XXX: This doesn't check that there's not a manager thread outstanding,
-        * so it's based on the assumption that most manager callouts will change
-        * their QoS before parking.  If that stops being true, this may end up
-        * costing us more than we gain.
-        */
-       if (TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
-                       tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET){
-               reset_priority(tl, pthread_priority_from_wq_class_index(wq, WORKQUEUE_EVENT_MANAGER_BUCKET));
-               tl->th_priority = WORKQUEUE_EVENT_MANAGER_BUCKET;
-       }
-
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               TAILQ_INSERT_HEAD(&wq->wq_thidlemgrlist, tl, th_entry);
-       } else {
-               TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
-       }
-
-       PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_START, wq,
-                       wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, 0);
-
-       /*
-        * When we remove the voucher from the thread, we may lose our importance
-        * causing us to get preempted, so we do this after putting the thread on
-        * the idle list.  That when, when we get our importance back we'll be able
-        * to use this thread from e.g. the kevent call out to deliver a boosting
-        * message.
-        */
-       workqueue_unlock(wq);
-       kern_return_t kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
-       assert(kr == KERN_SUCCESS);
-       workqueue_lock_spin(wq);
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
-               if (thidlecount < 101) {
-                       us_to_wait = wq_reduce_pool_window_usecs - ((thidlecount-2) * (wq_reduce_pool_window_usecs / 100));
-               } else {
-                       us_to_wait = wq_reduce_pool_window_usecs / 100;
-               }
-
-               thread_set_pending_block_hint(thread, kThreadWaitParkedWorkQueue);
-               assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
-                               TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
-                               wq_reduce_pool_window_usecs/10, NSEC_PER_USEC);
-
-               workqueue_unlock(wq);
-
-               thread_block(wq_unpark_continue);
-               panic("thread_block(wq_unpark_continue) returned!");
-       } else {
-               workqueue_unlock(wq);
-
-               /*
-                * While we'd dropped the lock to unset our voucher, someone came
-                * around and made us runnable.  But because we weren't waiting on the
-                * event their wakeup() was ineffectual.  To correct for that, we just
-                * run the continuation ourselves.
-                */
-               wq_unpark_continue(NULL, THREAD_AWAKENED);
-       }
-}
-
-static boolean_t may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, uint32_t my_priclass, boolean_t *start_timer){
-       if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-               /*
-                * we need 1 or more constrained threads to return to the kernel before
-                * we can dispatch additional work
-                */
-               return FALSE;
-       }
-
-       uint32_t busycount = 0;
-       uint32_t thactive_count = wq->wq_thactive_count[at_priclass];
-
-       // Has our most recently blocked thread blocked recently enough that we
-       // should still consider it busy?
-       if (wq->wq_thscheduled_count[at_priclass] > wq->wq_thactive_count[at_priclass]) {
-               if (wq_thread_is_busy(mach_absolute_time(), &wq->wq_lastblocked_ts[at_priclass])) {
-                       busycount++;
-               }
-       }
-
-       if (my_priclass < WORKQUEUE_NUM_BUCKETS && my_priclass == at_priclass){
-               /*
-                * don't count this thread as currently active
-                */
-               thactive_count--;
-       }
-
-       if (thactive_count + busycount >= wq->wq_max_concurrency) {
-               if (busycount && start_timer) {
-                               /*
-                                * we found at least 1 thread in the
-                                * 'busy' state... make sure we start
-                                * the timer because if they are the only
-                                * threads keeping us from scheduling
-                                * this work request, we won't get a callback
-                                * to kick off the timer... we need to
-                                * start it now...
-                                */
-                               *start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
-               }
-
-               PTHREAD_TRACE_WQ(TRACE_wq_overcommitted|DBG_FUNC_NONE, wq, ((start_timer && *start_timer) ? 1 << _PTHREAD_PRIORITY_FLAGS_SHIFT : 0) | class_index_get_pthread_priority(at_priclass), thactive_count, busycount, 0);
-
-               return FALSE;
-       }
-       return TRUE;
-}
-
-static struct threadlist *
-pop_from_thidlelist(struct workqueue *wq, uint32_t priclass)
-{
-       assert(wq->wq_thidlecount);
-
-       struct threadlist *tl = NULL;
-
-       if (!TAILQ_EMPTY(&wq->wq_thidlemgrlist) && 
-                       (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlelist))){
-               tl = TAILQ_FIRST(&wq->wq_thidlemgrlist);
-               TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
-               assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-       } else if (!TAILQ_EMPTY(&wq->wq_thidlelist) &&
-                       (priclass != WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlemgrlist))){
-               tl = TAILQ_FIRST(&wq->wq_thidlelist);
-               TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-               assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
-       } else {
-               panic("pop_from_thidlelist called with no threads available");
-       }
-       assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-
-       assert(wq->wq_thidlecount);
-       wq->wq_thidlecount--;
-
-       TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
-
-       tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
-
-       wq->wq_threads_scheduled++;
-       wq->wq_thscheduled_count[priclass]++;
-       OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
-
-       return tl;
-}
-
-static pthread_priority_t
-pthread_priority_from_wq_class_index(struct workqueue *wq, int index){
-       if (index == WORKQUEUE_EVENT_MANAGER_BUCKET){
-               return wq->wq_event_manager_priority;
-       } else {
-               return class_index_get_pthread_priority(index);
-       }
-}
-
-static void
-reset_priority(struct threadlist *tl, pthread_priority_t pri){
-       kern_return_t ret;
-       thread_t th = tl->th_thread;
-
-       if ((pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
-               ret = pthread_kern->thread_set_workq_qos(th, pthread_priority_get_thread_qos(pri), 0);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-               if (tl->th_flags & TH_LIST_EVENT_MGR_SCHED_PRI) {
-
-                       /* Reset priority to default (masked by QoS) */
-
-                       ret = pthread_kern->thread_set_workq_pri(th, 31, POLICY_TIMESHARE);
-                       assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-                       tl->th_flags &= ~TH_LIST_EVENT_MGR_SCHED_PRI;
-               }
-       } else {
-               ret = pthread_kern->thread_set_workq_qos(th, THREAD_QOS_UNSPECIFIED, 0);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-               ret = pthread_kern->thread_set_workq_pri(th, (pri & (~_PTHREAD_PRIORITY_FLAGS_MASK)), POLICY_TIMESHARE);
-               assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-               tl->th_flags |= TH_LIST_EVENT_MGR_SCHED_PRI;
-       }
-}
-
-/**
- * grabs a thread for a request
- *
- *  - called with the workqueue lock held...
- *  - responsible for dropping it in all cases
- *  - if provided mode is for overcommit, doesn't consume a reqcount
- *
- */
-static thread_t
-workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
-               enum run_nextreq_mode mode, pthread_priority_t prio,
-               bool kevent_bind_via_return)
-{
-       thread_t th_to_run = THREAD_NULL;
-       uint32_t upcall_flags = 0;
-       uint32_t priclass;
-       struct threadlist *tl = NULL;
-       struct uthread *uth = NULL;
-       boolean_t start_timer = FALSE;
-
-       if (mode == RUN_NEXTREQ_ADD_TIMER) {
-               mode = RUN_NEXTREQ_DEFAULT;
-       }
-
-       // valid modes to call this function with
-       assert(mode == RUN_NEXTREQ_DEFAULT || mode == RUN_NEXTREQ_DEFAULT_KEVENT ||
-                       mode == RUN_NEXTREQ_OVERCOMMIT || mode == RUN_NEXTREQ_UNCONSTRAINED ||
-                       mode == RUN_NEXTREQ_EVENT_MANAGER || mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT);
-       // may only have a priority if in OVERCOMMIT or DEFAULT_KEVENT mode
-       assert(mode == RUN_NEXTREQ_OVERCOMMIT || mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT ||
-                       mode == RUN_NEXTREQ_DEFAULT_KEVENT || prio == 0);
-       // thread == thread_null means "please spin up a new workqueue thread, we can't reuse this"
-       // thread != thread_null is thread reuse, and must be the current thread
-       assert(thread == THREAD_NULL || thread == current_thread());
-
-       PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_START, wq, thread_tid(thread), wq->wq_thidlecount, wq->wq_reqcount, 0);
-
-       if (thread != THREAD_NULL) {
-               uth = pthread_kern->get_bsdthread_info(thread);
-
-               if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL) {
-                       panic("wq thread with no threadlist");
-               }
-       }
-
-       /*
-        * from here until we drop the workq lock we can't be pre-empted since we
-        * hold the lock in spin mode... this is important since we have to
-        * independently update the priority that the thread is associated with and
-        * the priorty based counters that "workqueue_callback" also changes and
-        * bases decisions on.
-        */
-
-       /*
-        * This giant monstrosity does three things:
-        *
-        *   - adjusts the mode, if required
-        *   - selects the priclass that we'll be servicing
-        *   - sets any mode-specific upcall flags
-        *
-        * When possible special-cases should be handled here and converted into
-        * non-special cases.
-        */
-       if (mode == RUN_NEXTREQ_OVERCOMMIT) {
-               priclass = pthread_priority_get_class_index(prio);
-               upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
-       } else if (mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT){
-               priclass = pthread_priority_get_class_index(prio);
-               upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-       } else if (mode == RUN_NEXTREQ_EVENT_MANAGER){
-               assert(wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0);
-               priclass = WORKQUEUE_EVENT_MANAGER_BUCKET;
-               upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
-               if (wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET]){
-                       upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-               }
-       } else if (wq->wq_reqcount == 0){
-               // no work to do.  we'll check again when new work arrives.
-               goto done;
-       } else if (mode == RUN_NEXTREQ_DEFAULT_KEVENT) {
-               assert(kevent_bind_via_return);
-
-               priclass = pthread_priority_get_class_index(prio);
-               assert(priclass < WORKQUEUE_EVENT_MANAGER_BUCKET);
-               assert(wq->wq_kevent_requests[priclass] > 0);
-
-               upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-               mode = RUN_NEXTREQ_DEFAULT;
-       } else if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
-                          ((wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0) ||
-                               (thread != THREAD_NULL && tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))){
-               // There's an event manager request and either:
-               //   - no event manager currently running
-               //   - we are re-using the event manager
-               mode = RUN_NEXTREQ_EVENT_MANAGER;
-               priclass = WORKQUEUE_EVENT_MANAGER_BUCKET;
-               upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
-               if (wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET]){
-                       upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-               }
-       } else {
-               // Find highest priority and check for special request types
-               for (priclass = 0; priclass < WORKQUEUE_EVENT_MANAGER_BUCKET; priclass++) {
-                       if (wq->wq_requests[priclass])
-                               break;
-               }
-               if (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET){
-                       // only request should have been event manager since it's not in a bucket,
-                       // but we weren't able to handle it since there's already an event manager running,
-                       // so we fell into this case
-                       assert(wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 1 &&
-                                  wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 1 &&
-                                  wq->wq_reqcount == 1);
-                       goto done;
-               }
-
-               if (wq->wq_kevent_ocrequests[priclass]){
-                       mode = RUN_NEXTREQ_DEFERRED_OVERCOMMIT;
-                       upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-                       upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
-               } else if (wq->wq_ocrequests[priclass]){
-                       mode = RUN_NEXTREQ_DEFERRED_OVERCOMMIT;
-                       upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
-               } else if (wq->wq_kevent_requests[priclass]){
-                       upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-               }
-       }
-
-       assert(mode != RUN_NEXTREQ_EVENT_MANAGER || priclass == WORKQUEUE_EVENT_MANAGER_BUCKET);
-       assert(mode == RUN_NEXTREQ_EVENT_MANAGER || priclass != WORKQUEUE_EVENT_MANAGER_BUCKET);
-
-       if (mode == RUN_NEXTREQ_DEFAULT /* non-overcommit */){
-               uint32_t my_priclass = (thread != THREAD_NULL) ? tl->th_priority : WORKQUEUE_NUM_BUCKETS;
-               if (may_start_constrained_thread(wq, priclass, my_priclass, &start_timer) == FALSE){
-                       // per policy, we won't start another constrained thread
-                       goto done;
-               }
-       }
-
-       if (thread != THREAD_NULL) {
-               /*
-                * thread is non-NULL here when we return from userspace
-                * in workq_kernreturn, rather than trying to find a thread
-                * we pick up new work for this specific thread.
-                */
-               th_to_run = thread;
-               upcall_flags |= WQ_FLAG_THREAD_REUSE;
-       } else if (wq->wq_thidlecount == 0) {
-               /*
-                * we have no additional threads waiting to pick up
-                * work, however, there is additional work to do.
-                */
-               start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
-
-               PTHREAD_TRACE_WQ(TRACE_wq_stalled, wq, wq->wq_nthreads, start_timer, 0, 0);
-
-               goto done;
-       } else {
-               // there is both work available and an idle thread, so activate a thread
-               tl = pop_from_thidlelist(wq, priclass);
-               th_to_run = tl->th_thread;
-       }
-
-       // Adjust counters and thread flags AKA consume the request
-       // TODO: It would be lovely if OVERCOMMIT consumed reqcount
-       switch (mode) {
-               case RUN_NEXTREQ_DEFAULT:
-               case RUN_NEXTREQ_DEFAULT_KEVENT: /* actually mapped to DEFAULT above */
-               case RUN_NEXTREQ_ADD_TIMER: /* actually mapped to DEFAULT above */
-               case RUN_NEXTREQ_UNCONSTRAINED:
-                       wq->wq_reqcount--;
-                       wq->wq_requests[priclass]--;
-
-                       if (mode == RUN_NEXTREQ_DEFAULT){
-                               if (!(tl->th_flags & TH_LIST_CONSTRAINED)) {
-                                       wq->wq_constrained_threads_scheduled++;
-                                       tl->th_flags |= TH_LIST_CONSTRAINED;
-                               }
-                       } else if (mode == RUN_NEXTREQ_UNCONSTRAINED){
-                               if (tl->th_flags & TH_LIST_CONSTRAINED) {
-                                       wq->wq_constrained_threads_scheduled--;
-                                       tl->th_flags &= ~TH_LIST_CONSTRAINED;
-                               }
-                       }
-                       if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
-                               wq->wq_kevent_requests[priclass]--;
-                       }
-                       break;
-
-               case RUN_NEXTREQ_EVENT_MANAGER:
-                       wq->wq_reqcount--;
-                       wq->wq_requests[priclass]--;
-
-                       if (tl->th_flags & TH_LIST_CONSTRAINED) {
-                               wq->wq_constrained_threads_scheduled--;
-                               tl->th_flags &= ~TH_LIST_CONSTRAINED;
-                       }
-                       if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
-                               wq->wq_kevent_requests[priclass]--;
-                       }
-                       break;
-
-               case RUN_NEXTREQ_DEFERRED_OVERCOMMIT:
-                       wq->wq_reqcount--;
-                       wq->wq_requests[priclass]--;
-                       if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
-                               wq->wq_kevent_ocrequests[priclass]--;
-                       } else {
-                       wq->wq_ocrequests[priclass]--;
-                       }
-                       /* FALLTHROUGH */
-               case RUN_NEXTREQ_OVERCOMMIT:
-               case RUN_NEXTREQ_OVERCOMMIT_KEVENT:
-                       if (tl->th_flags & TH_LIST_CONSTRAINED) {
-                               wq->wq_constrained_threads_scheduled--;
-                               tl->th_flags &= ~TH_LIST_CONSTRAINED;
-                       }
-                       break;
-       }
-
-       // Confirm we've maintained our counter invariants
-       assert(wq->wq_requests[priclass] < UINT16_MAX);
-       assert(wq->wq_ocrequests[priclass] < UINT16_MAX);
-       assert(wq->wq_kevent_requests[priclass] < UINT16_MAX);
-       assert(wq->wq_kevent_ocrequests[priclass] < UINT16_MAX);
-       assert(wq->wq_ocrequests[priclass] + wq->wq_kevent_requests[priclass] +
-                       wq->wq_kevent_ocrequests[priclass] <=
-                       wq->wq_requests[priclass]);
-
-       assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-       if (upcall_flags & WQ_FLAG_THREAD_KEVENT) {
-               tl->th_flags |= TH_LIST_KEVENT;
-       } else {
-               tl->th_flags &= ~TH_LIST_KEVENT;
-       }
-
-       uint32_t orig_class = tl->th_priority;
-       tl->th_priority = (uint8_t)priclass;
-
-       if ((thread != THREAD_NULL) && (orig_class != priclass)) {
-               /*
-                * we need to adjust these counters based on this
-                * thread's new disposition w/r to priority
-                */
-               OSAddAtomic(-1, &wq->wq_thactive_count[orig_class]);
-               OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
-
-               wq->wq_thscheduled_count[orig_class]--;
-               wq->wq_thscheduled_count[priclass]++;
-       }
-       wq->wq_thread_yielded_count = 0;
-
-       pthread_priority_t outgoing_priority = pthread_priority_from_wq_class_index(wq, tl->th_priority);
-       PTHREAD_TRACE_WQ(TRACE_wq_reset_priority | DBG_FUNC_START, wq, thread_tid(tl->th_thread), outgoing_priority, 0, 0);
-       reset_priority(tl, outgoing_priority);
-       PTHREAD_TRACE_WQ(TRACE_wq_reset_priority | DBG_FUNC_END, wq, thread_tid(tl->th_thread), outgoing_priority, 0, 0);
-
-       /*
-        * persist upcall_flags so that in can be retrieved in setup_wqthread
-        */
-       tl->th_upcall_flags = upcall_flags >> WQ_FLAG_THREAD_PRIOSHIFT;
-
-       /*
-        * if current thread is reused for work request, does not return via unix_syscall
-        */
-       wq_runreq(p, th_to_run, wq, tl, (thread == th_to_run),
-                       (upcall_flags & WQ_FLAG_THREAD_KEVENT) && !kevent_bind_via_return);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(th_to_run), mode == RUN_NEXTREQ_OVERCOMMIT, 1, 0);
-
-       assert(!kevent_bind_via_return || (upcall_flags & WQ_FLAG_THREAD_KEVENT));
-       if (kevent_bind_via_return && (upcall_flags & WQ_FLAG_THREAD_KEVENT)) {
-               tl->th_flags |= TH_LIST_KEVENT_BOUND;
-       }
-
-       workqueue_unlock(wq);
-
-       return th_to_run;
-
-done:
-       if (start_timer)
-               workqueue_interval_timer_start(wq);
-
-       PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem | DBG_FUNC_END, wq, thread_tid(thread), start_timer, 3, 0);
-
-       if (thread != THREAD_NULL){
-               parkit(wq, tl, thread);
-               /* NOT REACHED */
-       }
-
-       workqueue_unlock(wq);
-
-       return THREAD_NULL;
-}
-
-/**
- * parked thread wakes up
- */
-static void __dead2
-wq_unpark_continue(void* __unused ptr, wait_result_t wait_result)
-{
-       boolean_t first_use = false;
-       thread_t th = current_thread();
-       proc_t p = current_proc();
-
-       struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-       if (uth == NULL) goto done;
-
-       struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-       if (tl == NULL) goto done;
-
-       struct workqueue *wq = tl->th_workq;
-
-       workqueue_lock_spin(wq);
-
-       assert(tl->th_flags & TH_LIST_INITED);
-
-       if ((tl->th_flags & TH_LIST_NEW)){
-               tl->th_flags &= ~(TH_LIST_NEW);
-               first_use = true;
-       }
-
-       if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
-               /*
-                * The normal wakeup path.
-                */
-               goto return_to_user;
-       }
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0 &&
-                       wait_result == THREAD_TIMED_OUT &&
-                       tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET &&
-                       TAILQ_FIRST(&wq->wq_thidlemgrlist) == tl &&
-                       TAILQ_NEXT(tl, th_entry) == NULL){
-               /*
-                * If we are the only idle manager and we pop'ed for self-destruction,
-                * then don't actually exit.  Instead, free our stack to save some
-                * memory and re-park.
-                */
-
-               workqueue_unlock(wq);
-
-               vm_map_t vmap = wq->wq_map;
-
-               // Keep this in sync with _setup_wqthread()
-               const vm_size_t       guardsize = vm_map_page_size(vmap);
-               const user_addr_t     freeaddr = (user_addr_t)tl->th_stackaddr + guardsize;
-               const vm_map_offset_t freesize = vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, vm_map_page_mask(vmap)) - guardsize;
-
-               int kr;
-               kr = mach_vm_behavior_set(vmap, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
-               assert(kr == KERN_SUCCESS || kr == KERN_INVALID_ADDRESS);
-
-               workqueue_lock_spin(wq);
-
-               if ( !(tl->th_flags & TH_LIST_RUNNING)) {
-                       thread_set_pending_block_hint(th, kThreadWaitParkedWorkQueue);
-                       assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE));
-
-                       workqueue_unlock(wq);
-
-                       thread_block(wq_unpark_continue);
-                       /* NOT REACHED */
-               }
-       }
-
-       if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
-               assert((tl->th_flags & TH_LIST_BUSY) == 0);
-               /*
-                * We were set running, but not for the purposes of actually running.
-                * This could be because the timer elapsed.  Or it could be because the
-                * thread aborted.  Either way, we need to return to userspace to exit.
-                *
-                * The call to workqueue_removethread will consume the lock.
-                */
-
-               if (!first_use &&
-                               (tl->th_priority < qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS) ||
-                               (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) {
-                       // Reset the QoS to something low for the pthread cleanup
-                       pthread_priority_t cleanup_pri = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
-                       reset_priority(tl, cleanup_pri);
-               }
-
-               workqueue_removethread(tl, 0, first_use);
-
-               if (first_use){
-                       pthread_kern->thread_bootstrap_return();
-               } else {
-                       pthread_kern->unix_syscall_return(0);
-               }
-               /* NOT REACHED */
-       }
-
-       /*
-        * The timer woke us up or the thread was aborted.  However, we have
-        * already started to make this a runnable thread.  Wait for that to
-        * finish, then continue to userspace.
-        */
-       while ((tl->th_flags & TH_LIST_BUSY)) {
-               assert_wait((caddr_t)tl, (THREAD_UNINT));
-
-               workqueue_unlock(wq);
-
-               thread_block(THREAD_CONTINUE_NULL);
-
-               workqueue_lock_spin(wq);
-       }
-
-return_to_user:
-       workqueue_unlock(wq);
-       _setup_wqthread(p, th, wq, tl, first_use);
-       pthread_kern->thread_sched_call(th, workqueue_callback);
-done:
-       if (first_use){
-               pthread_kern->thread_bootstrap_return();
-       } else {
-               pthread_kern->unix_syscall_return(EJUSTRETURN);
-       }
-       panic("Our attempt to return to userspace failed...");
-}
-
-/* called with workqueue lock held */
-static void
-wq_runreq(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl,
-                 boolean_t return_directly, boolean_t needs_kevent_bind)
-{
-       PTHREAD_TRACE1_WQ(TRACE_wq_runitem | DBG_FUNC_START, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
-
-       unsigned int kevent_flags = KEVENT_FLAG_WORKQ;
-       if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-               kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
-       }
-
-       if (return_directly) {
-               if (needs_kevent_bind) {
-                       assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-                       tl->th_flags |= TH_LIST_KEVENT_BOUND;
-               }
-
-               workqueue_unlock(wq);
-
-               if (needs_kevent_bind) {
-                       kevent_qos_internal_bind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
-               }
-
-               /*
-                * For preemption reasons, we want to reset the voucher as late as
-                * possible, so we do it in two places:
-                *   - Just before parking (i.e. in parkit())
-                *   - Prior to doing the setup for the next workitem (i.e. here)
-                *
-                * Those two places are sufficient to ensure we always reset it before
-                * it goes back out to user space, but be careful to not break that
-                * guarantee.
-                */
-               kern_return_t kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
-               assert(kr == KERN_SUCCESS);
-
-               _setup_wqthread(p, th, wq, tl, false);
-
-               PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0);
-
-               pthread_kern->unix_syscall_return(EJUSTRETURN);
-               /* NOT REACHED */
-       }
-
-       if (needs_kevent_bind) {
-               // Leave TH_LIST_BUSY set so that the thread can't beat us to calling kevent
-               workqueue_unlock(wq);
-               assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-               kevent_qos_internal_bind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
-               tl->th_flags |= TH_LIST_KEVENT_BOUND;
-               workqueue_lock_spin(wq);
-       }
-       tl->th_flags &= ~(TH_LIST_BUSY);
-       thread_wakeup_thread(tl,th);
-}
-
-#define KEVENT_LIST_LEN 16 // WORKQ_KEVENT_EVENT_BUFFER_LEN
-#define KEVENT_DATA_SIZE (32 * 1024)
+       workq_thread_set_top_addr(th_addrs, data_buf + data_available);
+       *kevent_list_out = kevent_list;
+       return ret;
+}
  
  /**
   * configures initial thread stack/registers to jump into:
@@ -3164,248 +893,101 @@ wq_runreq(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl,
   * When we are done the stack will look like:
   * |-----------| th_stackaddr + th_allocsize
   * |pthread_t  | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
- * |kevent list| optionally - at most KEVENT_LIST_LEN events
- * |kevent data| optionally - at most KEVENT_DATA_SIZE bytes
- * |stack gap  | bottom aligned to 16 bytes, and at least as big as stack_gap_min
+ * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
+ * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
+ * |stack gap  | bottom aligned to 16 bytes
   * |   STACK   |
   * |     ⇓     |
   * |           |
   * |guard page | guardsize
   * |-----------| th_stackaddr
   */
+__attribute__((noreturn,noinline))
  void
-_setup_wqthread(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl,
-               bool first_use)
+workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
+               mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags)
  {
-       int error;
-       uint32_t upcall_flags;
-
-       pthread_priority_t priority = pthread_priority_from_wq_class_index(wq, tl->th_priority);
-
-       const vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
-       const vm_size_t stack_gap_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_REDZONE_LEN;
-       const vm_size_t stack_align_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_STK_ALIGN;
-
-       user_addr_t pthread_self_addr = (user_addr_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET);
-       user_addr_t stack_top_addr = (user_addr_t)((pthread_self_addr - stack_gap_min) & -stack_align_min);
-       user_addr_t stack_bottom_addr = (user_addr_t)(tl->th_stackaddr + guardsize);
-
-       user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
-       if (!wqstart_fnptr) {
-               panic("workqueue thread start function pointer is NULL");
-       }
-
-       /* Put the QoS class value into the lower bits of the reuse_thread register, this is where
-        * the thread priority used to be stored anyway.
-        */
-       upcall_flags  = tl->th_upcall_flags << WQ_FLAG_THREAD_PRIOSHIFT;
-       upcall_flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
-
-       upcall_flags |= WQ_FLAG_THREAD_NEWSPI;
-
-       uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
-       if (tsd_offset) {
-               mach_vm_offset_t th_tsd_base = (mach_vm_offset_t)pthread_self_addr + tsd_offset;
-               kern_return_t kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
-               if (kret == KERN_SUCCESS) {
-                       upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
-               }
-       }
-
-       if (first_use) {
-               /*
-               * Pre-fault the first page of the new thread's stack and the page that will
-               * contain the pthread_t structure.
-               */
-               vm_map_t vmap = pthread_kern->current_map();
-               if (vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
-                               vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap))){
-                       vm_fault( vmap,
-                                       vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
-                                       VM_PROT_READ | VM_PROT_WRITE,
-                                       FALSE,
-                                       THREAD_UNINT, NULL, 0);
-               }
-               vm_fault( vmap,
-                               vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap)),
-                               VM_PROT_READ | VM_PROT_WRITE,
-                               FALSE,
-                               THREAD_UNINT, NULL, 0);
-       } else {
-               upcall_flags |= WQ_FLAG_THREAD_REUSE;
-       }
-
+       struct workq_thread_addrs th_addrs;
+       bool first_use = (setup_flags & WQ_SETUP_FIRST_USE);
         user_addr_t kevent_list = NULL;
         int kevent_count = 0;
-       if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
-               kevent_list = pthread_self_addr - KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
-               kevent_count = KEVENT_LIST_LEN;
  
-               user_addr_t kevent_data_buf = kevent_list - KEVENT_DATA_SIZE;
-               user_size_t kevent_data_available = KEVENT_DATA_SIZE;
+       workq_thread_get_addrs(map, stackaddr, &th_addrs);
  
-               int32_t events_out = 0;
-
-               assert(tl->th_flags | TH_LIST_KEVENT_BOUND);
-               unsigned int flags = KEVENT_FLAG_WORKQ | KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
-               if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-                       flags |= KEVENT_FLAG_WORKQ_MANAGER;
-               }
-               int ret = kevent_qos_internal(p, class_index_get_thread_qos(tl->th_priority), NULL, 0, kevent_list, kevent_count,
-                                                                         kevent_data_buf, &kevent_data_available,
-                                                                         flags, &events_out);
-
-               // turns out there are a lot of edge cases where this will fail, so not enabled by default
-               //assert((ret == KERN_SUCCESS && events_out != -1) || ret == KERN_ABORTED);
-
-               // squash any errors into just empty output on
-               if (ret != KERN_SUCCESS || events_out == -1){
-                       events_out = 0;
-                       kevent_data_available = KEVENT_DATA_SIZE;
-               }
-
-               // We shouldn't get data out if there aren't events available
-               assert(events_out != 0 || kevent_data_available == KEVENT_DATA_SIZE);
-
-               if (events_out > 0){
-                       if (kevent_data_available == KEVENT_DATA_SIZE){
-                               stack_top_addr = (kevent_list - stack_gap_min) & -stack_align_min;
-                       } else {
-                               stack_top_addr = (kevent_data_buf + kevent_data_available - stack_gap_min) & -stack_align_min;
+       if (first_use) {
+               uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
+               if (tsd_offset) {
+                       mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset;
+                       kern_return_t kret = pthread_kern->thread_set_tsd_base(th,
+                                       th_tsd_base);
+                       if (kret == KERN_SUCCESS) {
+                               upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
                         }
-
-                       kevent_count = events_out;
-               } else {
-                       kevent_list = NULL;
-                       kevent_count = 0;
-               }
-       }
-
-#if defined(__i386__) || defined(__x86_64__)
-       if (proc_is64bit(p) == 0) {
-               x86_thread_state32_t state = {
-                       .eip = (unsigned int)wqstart_fnptr,
-                       .eax = /* arg0 */ (unsigned int)pthread_self_addr,
-                       .ebx = /* arg1 */ (unsigned int)tl->th_thport,
-                       .ecx = /* arg2 */ (unsigned int)stack_bottom_addr,
-                       .edx = /* arg3 */ (unsigned int)kevent_list,
-                       .edi = /* arg4 */ (unsigned int)upcall_flags,
-                       .esi = /* arg5 */ (unsigned int)kevent_count,
-
-                       .esp = (int)((vm_offset_t)stack_top_addr),
-               };
-
-               error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-               if (error != KERN_SUCCESS) {
-                       panic(__func__ ": thread_set_wq_state failed: %d", error);
                 }
-       } else {
-               x86_thread_state64_t state64 = {
-                       // x86-64 already passes all the arguments in registers, so we just put them in their final place here
-                       .rip = (uint64_t)wqstart_fnptr,
-                       .rdi = (uint64_t)pthread_self_addr,
-                       .rsi = (uint64_t)tl->th_thport,
-                       .rdx = (uint64_t)stack_bottom_addr,
-                       .rcx = (uint64_t)kevent_list,
-                       .r8  = (uint64_t)upcall_flags,
-                       .r9  = (uint64_t)kevent_count,
  
-                       .rsp = (uint64_t)(stack_top_addr)
-               };
-
-               error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
-               if (error != KERN_SUCCESS) {
-                       panic(__func__ ": thread_set_wq_state failed: %d", error);
+               /*
+                * Pre-fault the first page of the new thread's stack and the page that will
+                * contain the pthread_t structure.
+                */
+               vm_map_offset_t mask = vm_map_page_mask(map);
+               vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask);
+               vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask);
+               if (th_page != stk_page) {
+                       vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
                 }
+               vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
         }
-#else
-#error setup_wqthread  not defined for this architecture
-#endif
-}
-
-#if DEBUG
-static int wq_kevent_test SYSCTL_HANDLER_ARGS {
-       //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
-#pragma unused(oidp, arg1, arg2)
-       int error;
-       struct workq_reqthreads_req_s requests[64] = {};
-
-       if (req->newlen > sizeof(requests) || req->newlen < sizeof(struct workq_reqthreads_req_s))
-               return EINVAL;
-
-       error = copyin(req->newptr, requests, req->newlen);
-       if (error) return error;
-
-       _workq_reqthreads(req->p, (int)(req->newlen / sizeof(struct workq_reqthreads_req_s)), requests);
-       
-       return 0;
-}
-#endif // DEBUG
-       
-#pragma mark - Misc
-
-int 
-_fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
-{
-       struct workqueue * wq;
-       int error = 0;
-       int     activecount;
-       uint32_t pri;
-
-       if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
-               return EINVAL;
-       }
-
-       workqueue_lock_spin(wq);
-       activecount = 0;
  
-       for (pri = 0; pri < WORKQUEUE_NUM_BUCKETS; pri++) {
-               activecount += wq->wq_thactive_count[pri];
+       if (setup_flags & WQ_SETUP_EXIT_THREAD) {
+               kevent_count = WORKQ_EXIT_THREAD_NKEVENT;
+       } else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) {
+               unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
+               workq_kevent(p, &th_addrs, NULL, 0, flags, &kevent_list, &kevent_count);
         }
-       pwqinfo->pwq_nthreads = wq->wq_nthreads;
-       pwqinfo->pwq_runthreads = activecount;
-       pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
-       pwqinfo->pwq_state = 0;
  
-       if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-               pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
-       }
+       workq_set_register_state(p, th, &th_addrs, kport,
+                       kevent_list, upcall_flags, kevent_count);
  
-       if (wq->wq_nthreads >= wq_max_threads) {
-               pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
+       if (first_use) {
+               pthread_kern->thread_bootstrap_return();
+       } else {
+               pthread_kern->unix_syscall_return(EJUSTRETURN);
         }
-
-       workqueue_unlock(wq);
-       return(error);
+       __builtin_unreachable();
  }
  
-uint32_t
-_get_pwq_state_kdp(proc_t p)
+int
+workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
+               user_addr_t stackaddr, mach_port_name_t kport,
+               user_addr_t events, int nevents, int upcall_flags)
  {
-       if (p == NULL) {
-               return 0;
-       }
-
-       struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
+       struct workq_thread_addrs th_addrs;
+       user_addr_t kevent_list = NULL;
+       int kevent_count = 0, error;
+       __assert_only kern_return_t kr;
  
-       if (wq == NULL || workqueue_lock_spin_is_acquired_kdp(wq)) {
-               return 0;
-       }
+       workq_thread_get_addrs(map, stackaddr, &th_addrs);
  
-       uint32_t pwq_state = WQ_FLAGS_AVAILABLE;
+       unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE |
+                       KEVENT_FLAG_PARKING;
+       error = workq_kevent(p, &th_addrs, events, nevents, flags,
+                       &kevent_list, &kevent_count);
  
-       if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-               pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+       if (error || kevent_count == 0) {
+               return error;
         }
  
-       if (wq->wq_nthreads >= wq_max_threads) {
-               pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
-       }
+       kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
+       assert(kr == KERN_SUCCESS);
  
-       return pwq_state;
+       workq_set_register_state(p, th, &th_addrs, kport,
+                       kevent_list, upcall_flags, kevent_count);
+
+       pthread_kern->unix_syscall_return(EJUSTRETURN);
+       __builtin_unreachable();
  }
  
-int 
+int
  _thread_selfid(__unused struct proc *p, uint64_t *retval)
  {
         thread_t thread = current_thread();
@@ -3418,35 +1000,21 @@ _pthread_init(void)
  {
         pthread_lck_grp_attr = lck_grp_attr_alloc_init();
         pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
-       
+
         /*
          * allocate the lock attribute for pthread synchronizers
          */
         pthread_lck_attr = lck_attr_alloc_init();
-
         pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
-       
+
         pth_global_hashinit();
         psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
         psynch_zoneinit();
  
-       /*
-        * register sysctls
-        */
-       sysctl_register_oid(&sysctl__kern_wq_yielded_threshold);
-       sysctl_register_oid(&sysctl__kern_wq_yielded_window_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs);
-       sysctl_register_oid(&sysctl__kern_wq_max_threads);
-       sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
-       sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
-
-#if DEBUG
-       sysctl_register_oid(&sysctl__kern_wq_max_concurrency);
-       sysctl_register_oid(&sysctl__debug_wq_kevent_test);
-#endif
-
-       wq_max_concurrency = pthread_kern->ml_get_max_cpus();
+       int policy_bootarg;
+       if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) {
+               pthread_mutex_default_policy = policy_bootarg;
+       }
  
+       sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy);
  }