From 214d78a2e71d35948bb3c390fec58031c3f0611b Mon Sep 17 00:00:00 2001
From: Apple <opensource@apple.com>
Date: Thu, 4 Oct 2018 22:01:40 +0000
Subject: [PATCH] libpthread-330.201.1.tar.gz

---
 kern/kern_init.c                     |   20 +-
 kern/kern_internal.h                 |  153 +-
 kern/kern_policy.c                   |  182 --
 kern/kern_support.c                  | 3837 +++-----------------------
 kern/kern_synch.c                    | 1550 ++++++-----
 kern/kern_trace.h                    |   34 +-
 kern/synch_internal.h                |   15 +-
 kern/workqueue_internal.h            |  181 +-
 libpthread.xcodeproj/project.pbxproj |   66 +-
 lldbmacros/init.py                   |  310 +++
 lldbmacros/pthread.py                |  152 -
 man/pthread_mutexattr.3              |   83 +
 private/dependency_private.h         |  164 ++
 private/private.h                    |   16 +-
 private/qos_private.h                |   43 +-
 private/tsd_private.h                |    5 +
 private/workqueue_private.h          |    8 +
 pthread/introspection.h              |   38 +-
 pthread/pthread.h                    |   13 +
 pthread/pthread_spis.h               |   14 +-
 pthread/stack_np.h                   |   72 +
 src/internal.h                       |  253 +-
 src/offsets.h                        |   58 +
 src/pthread.c                        | 2181 ++++++++-------
 src/pthread_asm.s                    |   97 +
 src/pthread_cancelable.c             |  418 +--
 src/pthread_cond.c                   |   76 +-
 src/pthread_dependency.c             |  110 +
 src/pthread_mutex.c                  |  962 +++++--
 src/pthread_rwlock.c                 |    6 +-
 src/pthread_tsd.c                    |   33 +-
 src/qos.c                            |  289 +-
 src/thread_setup.c                   |   67 +-
 tests/Makefile                       |   12 +-
 tests/cond_prepost.c                 |  217 ++
 tests/main_stack_custom.c            |    2 +-
 tests/mutex.c                        |    5 +-
 tests/mutex_prepost.c                |  157 ++
 tests/perf_contended_mutex_rwlock.c  |  519 ++++
 tests/pthread_dependency.c           |   78 +
 tests/pthread_threadid_np.c          |    2 +-
 tests/rdar_32848402.c                |    4 +-
 tests/stack.c                        |   82 +
 tests/stack_aslr.c                   |    4 +-
 tests/stack_size.c                   |   81 +
 tools/locktrace.lua                  |  273 +-
 tools/wqtrace.lua                    |  349 ---
 xcodescripts/install-lldbmacros.sh   |   15 +-
 xcodescripts/kext.xcconfig           |    2 +-
 xcodescripts/pthread.dirty           |   33 +
 xcodescripts/pthread.xcconfig        |    6 +-
 xcodescripts/resolved.xcconfig       |    2 +-
 52 files changed, 6172 insertions(+), 7177 deletions(-)
 delete mode 100644 kern/kern_policy.c
 create mode 100644 lldbmacros/init.py
 delete mode 100644 lldbmacros/pthread.py
 create mode 100644 private/dependency_private.h
 create mode 100644 pthread/stack_np.h
 create mode 100644 src/offsets.h
 create mode 100644 src/pthread_dependency.c
 create mode 100644 tests/cond_prepost.c
 create mode 100644 tests/mutex_prepost.c
 create mode 100644 tests/perf_contended_mutex_rwlock.c
 create mode 100644 tests/pthread_dependency.c
 create mode 100644 tests/stack.c
 create mode 100644 tests/stack_size.c
 delete mode 100755 tools/wqtrace.lua
 create mode 100644 xcodescripts/pthread.dirty

diff --git a/kern/kern_init.c b/kern/kern_init.c
index 3de9b5d..3321483 100644
--- a/kern/kern_init.c
+++ b/kern/kern_init.c
@@ -17,21 +17,12 @@ pthread_callbacks_t pthread_kern;
 
 const struct pthread_functions_s pthread_internal_functions = {
 	.pthread_init = _pthread_init,
-	.fill_procworkqueue = (int(*)(proc_t, void*))_fill_procworkqueue,
-	.get_pwq_state_kdp = _get_pwq_state_kdp,
-	.workqueue_exit = _workqueue_exit,
-	.workqueue_mark_exiting = _workqueue_mark_exiting,
-	.workqueue_thread_yielded = _workqueue_thread_yielded,
-	.workqueue_get_sched_callback = _workqueue_get_sched_callback,
 	.pth_proc_hashinit = _pth_proc_hashinit,
 	.pth_proc_hashdelete = _pth_proc_hashdelete,
 	.bsdthread_create = _bsdthread_create,
 	.bsdthread_register = _bsdthread_register,
 	.bsdthread_terminate = _bsdthread_terminate,
-	.bsdthread_ctl = _bsdthread_ctl,
 	.thread_selfid = _thread_selfid,
-	.workq_kernreturn = _workq_kernreturn,
-	.workq_open = _workq_open,
 
 	.psynch_mutexwait = _psynch_mutexwait,
 	.psynch_mutexdrop = _psynch_mutexdrop,
@@ -48,12 +39,11 @@ const struct pthread_functions_s pthread_internal_functions = {
 	.pthread_find_owner = _pthread_find_owner,
 	.pthread_get_thread_kwq = _pthread_get_thread_kwq,
 
-	.workq_reqthreads = _workq_reqthreads,
-	.thread_qos_from_pthread_priority = _thread_qos_from_pthread_priority,
-	.pthread_priority_canonicalize2 = _pthread_priority_canonicalize,
-	.workq_thread_has_been_unbound = _workq_thread_has_been_unbound,
-	.workq_threadreq = workq_kern_threadreq,
-	.workq_threadreq_modify = workq_kern_threadreq_modify,
+	.workq_create_threadstack = workq_create_threadstack,
+	.workq_destroy_threadstack = workq_destroy_threadstack,
+	.workq_setup_thread = workq_setup_thread,
+	.workq_handle_stack_events = workq_handle_stack_events,
+	.workq_markfree_threadstack = workq_markfree_threadstack,
 };
 
 kern_return_t pthread_start(__unused kmod_info_t * ki, __unused void *d)
diff --git a/kern/kern_internal.h b/kern/kern_internal.h
index fa2c27b..bb29cdc 100644
--- a/kern/kern_internal.h
+++ b/kern/kern_internal.h
@@ -29,7 +29,12 @@
 #ifndef _SYS_PTHREAD_INTERNAL_H_
 #define _SYS_PTHREAD_INTERNAL_H_
 
+#include <pthread/bsdthread_private.h>
+#include <pthread/priority_private.h>
+#include <pthread/workqueue_syscalls.h>
+
 #ifdef KERNEL
+struct ksyn_waitq_element;
 #include <stdatomic.h>
 #include <kern/thread_call.h>
 #include <kern/kcdata.h>
@@ -64,101 +69,6 @@
 #define PTHREAD_FEATURE_WORKLOOP          0x80		/* supports workloops */
 #define PTHREAD_FEATURE_QOS_DEFAULT		0x40000000	/* the kernel supports QOS_CLASS_DEFAULT */
 
-/* pthread bsdthread_ctl sysctl commands */
-#define BSDTHREAD_CTL_SET_QOS				0x10	/* bsdthread_ctl(BSDTHREAD_CTL_SET_QOS, thread_port, tsd_entry_addr, 0) */
-#define BSDTHREAD_CTL_GET_QOS				0x20	/* bsdthread_ctl(BSDTHREAD_CTL_GET_QOS, thread_port, 0, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_START	0x40	/* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_START, thread_port, priority, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_END		0x80	/* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_END, thread_port, 0, 0) */
-#define BSDTHREAD_CTL_SET_SELF				0x100	/* bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, flags) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_RESET	0x200	/* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_RESET, 0, 0, 0) */
-#define BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH	0x400	/* bsdthread_ctl(BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH, thread_port, priority, 0) */
-#define BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD				0x401	/* bsdthread_ctl(BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD, thread_port, priority, resource) */
-#define BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET				0x402	/* bsdthread_ctl(BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET, 0|1 (?reset_all), resource, 0) */
-#define BSDTHREAD_CTL_QOS_MAX_PARALLELISM	0x800	/* bsdthread_ctl(BSDTHREAD_CTL_QOS_MAX_PARALLELISM, priority, flags, 0) */
-
-/* qos_class_t is mapped into one of these bits in the bitfield, this mapping now exists here because
- * libdispatch requires the QoS class mask of the pthread_priority_t to be a bitfield.
- */
-#define __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE 0x20
-#define __PTHREAD_PRIORITY_CBIT_USER_INITIATED 0x10
-#define __PTHREAD_PRIORITY_CBIT_DEFAULT 0x8
-#define __PTHREAD_PRIORITY_CBIT_UTILITY 0x4
-#define __PTHREAD_PRIORITY_CBIT_BACKGROUND 0x2
-#define __PTHREAD_PRIORITY_CBIT_MAINTENANCE 0x1
-#define __PTHREAD_PRIORITY_CBIT_UNSPECIFIED 0x0
-
-static inline int
-_pthread_qos_class_to_thread_qos(qos_class_t qos)
-{
-	switch (qos) {
-	case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
-	case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
-	case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
-	case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
-	case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
-	case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
-	default: return THREAD_QOS_UNSPECIFIED;
-	}
-}
-
-static inline pthread_priority_t
-_pthread_priority_make_newest(qos_class_t qc, int rel, unsigned long flags)
-{
-	pthread_priority_t cls;
-	switch (qc) {
-		case QOS_CLASS_USER_INTERACTIVE: cls = __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE; break;
-		case QOS_CLASS_USER_INITIATED: cls = __PTHREAD_PRIORITY_CBIT_USER_INITIATED; break;
-		case QOS_CLASS_DEFAULT: cls = __PTHREAD_PRIORITY_CBIT_DEFAULT; break;
-		case QOS_CLASS_UTILITY: cls = __PTHREAD_PRIORITY_CBIT_UTILITY; break;
-		case QOS_CLASS_BACKGROUND: cls = __PTHREAD_PRIORITY_CBIT_BACKGROUND; break;
-		case QOS_CLASS_MAINTENANCE: cls = __PTHREAD_PRIORITY_CBIT_MAINTENANCE; break;
-		case QOS_CLASS_UNSPECIFIED:
-		default:
-			cls = __PTHREAD_PRIORITY_CBIT_UNSPECIFIED;
-			rel = 1; // results in priority bits == 0 <rdar://problem/16184900>
-			break;
-	}
-
-	pthread_priority_t p =
-		(flags & _PTHREAD_PRIORITY_FLAGS_MASK) |
-		((cls << _PTHREAD_PRIORITY_QOS_CLASS_SHIFT) & _PTHREAD_PRIORITY_QOS_CLASS_MASK) |
-		(((uint8_t)rel - 1) & _PTHREAD_PRIORITY_PRIORITY_MASK);
-
-	return p;
-}
-
-static inline qos_class_t
-_pthread_priority_get_qos_newest(pthread_priority_t priority)
-{
-	qos_class_t qc;
-	switch ((priority & _PTHREAD_PRIORITY_QOS_CLASS_MASK) >> _PTHREAD_PRIORITY_QOS_CLASS_SHIFT) {
-		case __PTHREAD_PRIORITY_CBIT_USER_INTERACTIVE: qc = QOS_CLASS_USER_INTERACTIVE; break;
-		case __PTHREAD_PRIORITY_CBIT_USER_INITIATED: qc = QOS_CLASS_USER_INITIATED; break;
-		case __PTHREAD_PRIORITY_CBIT_DEFAULT: qc = QOS_CLASS_DEFAULT; break;
-		case __PTHREAD_PRIORITY_CBIT_UTILITY: qc = QOS_CLASS_UTILITY; break;
-		case __PTHREAD_PRIORITY_CBIT_BACKGROUND: qc = QOS_CLASS_BACKGROUND; break;
-		case __PTHREAD_PRIORITY_CBIT_MAINTENANCE: qc = QOS_CLASS_MAINTENANCE; break;
-		case __PTHREAD_PRIORITY_CBIT_UNSPECIFIED:
-		default: qc = QOS_CLASS_UNSPECIFIED; break;
-	}
-	return qc;
-}
-
-#define _pthread_priority_get_relpri(priority) \
-	((int8_t)((priority & _PTHREAD_PRIORITY_PRIORITY_MASK) >> _PTHREAD_PRIORITY_PRIORITY_SHIFT) + 1)
-
-#define _pthread_priority_get_flags(priority) \
-	(priority & _PTHREAD_PRIORITY_FLAGS_MASK)
-
-#define _pthread_priority_split_newest(priority, qos, relpri) \
-	({ qos = _pthread_priority_get_qos_newest(priority); \
-	   relpri = (qos == QOS_CLASS_UNSPECIFIED) ? 0 : \
-		   _pthread_priority_get_relpri(priority); \
-	})
-
-#define _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL 0x1
-#define _PTHREAD_QOS_PARALLELISM_REALTIME 0x2
-
 /* userspace <-> kernel registration struct, for passing data to/from the kext during main thread init. */
 struct _pthread_registration_data {
 	/*
@@ -177,9 +87,16 @@ struct _pthread_registration_data {
 	uint32_t tsd_offset; /* copy-in */
 	uint32_t return_to_kernel_offset; /* copy-in */
 	uint32_t mach_thread_self_offset; /* copy-in */
+	mach_vm_address_t stack_addr_hint; /* copy-out */
 	uint32_t mutex_default_policy; /* copy-out */
 } __attribute__ ((packed));
 
+/*
+ * "error" flags returned by fail condvar syscalls
+ */
+#define ECVCLEARED	0x100
+#define ECVPREPOST	0x200
+
 #ifdef KERNEL
 
 /* The set of features, from the feature bits above, that we support. */
@@ -198,23 +115,16 @@ extern pthread_callbacks_t pthread_kern;
 struct ksyn_waitq_element {
 	TAILQ_ENTRY(ksyn_waitq_element) kwe_list;	/* link to other list members */
 	void *          kwe_kwqqueue;            	/* queue blocked on */
-	uint32_t	kwe_state;			/* state */
+	thread_t        kwe_thread;
+	uint16_t        kwe_state;			/* state */
+	uint16_t        kwe_flags;
 	uint32_t        kwe_lockseq;			/* the sequence of the entry */
 	uint32_t	kwe_count;			/* upper bound on number of matches still pending */
 	uint32_t 	kwe_psynchretval;		/* thread retval */
 	void		*kwe_uth;			/* uthread */
-	uint64_t	kwe_tid;			/* tid of waiter */
 };
 typedef struct ksyn_waitq_element * ksyn_waitq_element_t;
 
-pthread_priority_t thread_qos_get_pthread_priority(int qos) __attribute__((const));
-int thread_qos_get_class_index(int qos) __attribute__((const));
-int pthread_priority_get_thread_qos(pthread_priority_t priority) __attribute__((const));
-int pthread_priority_get_class_index(pthread_priority_t priority) __attribute__((const));
-pthread_priority_t class_index_get_pthread_priority(int index) __attribute__((const));
-int class_index_get_thread_qos(int index) __attribute__((const));
-int qos_class_get_class_index(int qos) __attribute__((const));
-
 #define PTH_DEFAULT_STACKSIZE 512*1024
 #define MAX_PTHREAD_SIZE 64*1024
 
@@ -276,29 +186,24 @@ extern thread_call_t psynch_thcall;
 
 struct uthread* current_uthread(void);
 
-#define WORKQ_REQTHREADS_THREADREQ   0x1
-#define WORKQ_REQTHREADS_NOEMERGENCY 0x2
-
-// Call for the kernel's kevent system to request threads.  A list of QoS/event
-// counts should be provided, sorted by flags and then QoS class.  If the
-// identity of the thread to handle the request is known, it will be returned.
-// If a new thread must be created, NULL will be returned.
-thread_t _workq_reqthreads(struct proc *p, int requests_count,
-						   workq_reqthreads_req_t requests);
+int
+workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr);
 
-// Resolve a pthread_priority_t to a QoS/relative pri
-integer_t _thread_qos_from_pthread_priority(unsigned long pri, unsigned long *flags);
-// Clear out extraneous flags/pri info for putting in voucher
-pthread_priority_t _pthread_priority_canonicalize(pthread_priority_t pri, boolean_t for_propagation);
+int
+workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr);
 
-boolean_t _workq_thread_has_been_unbound(thread_t th, int qos_class);
+void
+workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
+		mach_port_name_t kport, int th_qos, int setup_flags, int upcall_flags);
 
-int workq_kern_threadreq(struct proc *p, workq_threadreq_t req,
-		enum workq_threadreq_type, unsigned long priority, int flags);
+int
+workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
+		user_addr_t stackaddr, mach_port_name_t kport,
+		user_addr_t events, int nevents, int upcall_flags);
 
-int workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t req,
-		enum workq_threadreq_op operation,
-		unsigned long arg1, unsigned long arg2);
+void
+workq_markfree_threadstack(proc_t p, thread_t th, vm_map_t vmap,
+		user_addr_t stackaddr);
 
 #endif // KERNEL
 
diff --git a/kern/kern_policy.c b/kern/kern_policy.c
deleted file mode 100644
index 98e0c61..0000000
--- a/kern/kern_policy.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2013 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include "kern_internal.h"
-#include <kern/debug.h>
-#include <kern/assert.h>
-
-pthread_priority_t
-thread_qos_get_pthread_priority(int qos)
-{
-	/* Map the buckets we have in pthread_priority_t into a QoS tier. */
-	switch (qos) {
-		case THREAD_QOS_USER_INTERACTIVE: return _pthread_priority_make_newest(QOS_CLASS_USER_INTERACTIVE, 0, 0);
-		case THREAD_QOS_USER_INITIATED: return _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, 0);
-		case THREAD_QOS_LEGACY: return _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, 0);
-		case THREAD_QOS_UTILITY: return _pthread_priority_make_newest(QOS_CLASS_UTILITY, 0, 0);
-		case THREAD_QOS_BACKGROUND: return _pthread_priority_make_newest(QOS_CLASS_BACKGROUND, 0, 0);
-		case THREAD_QOS_MAINTENANCE: return _pthread_priority_make_newest(QOS_CLASS_MAINTENANCE, 0, 0);
-		default: return _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
-	}
-}
-
-int
-thread_qos_get_class_index(int qos)
-{
-    switch (qos) {
-		case THREAD_QOS_USER_INTERACTIVE: return 0;
-		case THREAD_QOS_USER_INITIATED: return 1;
-		case THREAD_QOS_LEGACY: return 2;
-		case THREAD_QOS_UTILITY: return 3;
-		case THREAD_QOS_BACKGROUND: return 4;
-		case THREAD_QOS_MAINTENANCE: return 5;
-		default: return 2;
-    }
-}
-
-int
-pthread_priority_get_thread_qos(pthread_priority_t priority)
-{
-	/* Map the buckets we have in pthread_priority_t into a QoS tier. */
-	switch (_pthread_priority_get_qos_newest(priority)) {
-		case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
-		case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
-		case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
-		case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
-		case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
-		case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
-		default: return THREAD_QOS_UNSPECIFIED;
-	}
-}
-
-int
-pthread_priority_get_class_index(pthread_priority_t priority)
-{
-	return qos_class_get_class_index(_pthread_priority_get_qos_newest(priority));
-}
-
-pthread_priority_t
-class_index_get_pthread_priority(int index)
-{
-	qos_class_t qos;
-	switch (index) {
-		case 0: qos = QOS_CLASS_USER_INTERACTIVE; break;
-		case 1: qos = QOS_CLASS_USER_INITIATED; break;
-		case 2: qos = QOS_CLASS_DEFAULT; break;
-		case 3: qos = QOS_CLASS_UTILITY; break;
-		case 4: qos = QOS_CLASS_BACKGROUND; break;
-		case 5: qos = QOS_CLASS_MAINTENANCE; break;
-		case 6: assert(index != 6); // EVENT_MANAGER should be handled specially
-		default:
-			/* Return the utility band if we don't understand the input. */
-			qos = QOS_CLASS_UTILITY;
-	}
-
-	pthread_priority_t priority;
-	priority = _pthread_priority_make_newest(qos, 0, 0);
-
-	return priority;
-}
-
-int
-class_index_get_thread_qos(int class)
-{
-	int thread_qos;
-	switch (class) {
-		case 0: thread_qos = THREAD_QOS_USER_INTERACTIVE; break;
-		case 1: thread_qos = THREAD_QOS_USER_INITIATED; break;
-		case 2: thread_qos = THREAD_QOS_LEGACY; break;
-		case 3: thread_qos = THREAD_QOS_UTILITY; break;
-		case 4: thread_qos = THREAD_QOS_BACKGROUND; break;
-		case 5: thread_qos = THREAD_QOS_MAINTENANCE; break;
-		case 6: thread_qos = THREAD_QOS_LAST; break;
-		default:
-			thread_qos = THREAD_QOS_LAST;
-	}
-	return thread_qos;
-}
-
-int
-qos_class_get_class_index(int qos)
-{
-	switch (qos){
-		case QOS_CLASS_USER_INTERACTIVE: return 0;
-		case QOS_CLASS_USER_INITIATED: return 1;
-		case QOS_CLASS_DEFAULT: return 2;
-		case QOS_CLASS_UTILITY: return 3;
-		case QOS_CLASS_BACKGROUND: return 4;
-		case QOS_CLASS_MAINTENANCE: return 5;
-		default:
-			/* Return the default band if we don't understand the input. */
-			return 2;
-	}
-}
-
-/**
- * Shims to help the kernel understand pthread_priority_t
- */
-
-integer_t
-_thread_qos_from_pthread_priority(unsigned long priority, unsigned long *flags)
-{
-    if (flags != NULL){
-        *flags = (int)_pthread_priority_get_flags(priority);
-    }
-    int thread_qos = pthread_priority_get_thread_qos(priority);
-    if (thread_qos == THREAD_QOS_UNSPECIFIED && flags != NULL){
-        *flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-    }
-    return thread_qos;
-}
-
-pthread_priority_t
-_pthread_priority_canonicalize(pthread_priority_t priority, boolean_t for_propagation)
-{
-	qos_class_t qos_class;
-	int relpri;
-	unsigned long flags = _pthread_priority_get_flags(priority);
-	_pthread_priority_split_newest(priority, qos_class, relpri);
-
-	if (for_propagation) {
-		flags = 0;
-		if (relpri > 0 || relpri < -15) relpri = 0;
-	} else {
-		if (qos_class == QOS_CLASS_UNSPECIFIED) {
-			flags = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-		} else if (flags & (_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG|_PTHREAD_PRIORITY_SCHED_PRI_FLAG)){
-			flags = _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-			qos_class = QOS_CLASS_UNSPECIFIED;
-		} else {
-			flags &= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-		}
-
-		relpri = 0;
-	}
-
-	return _pthread_priority_make_newest(qos_class, relpri, flags);
-}
diff --git a/kern/kern_support.c b/kern/kern_support.c
index 280a18b..e424cce 100644
--- a/kern/kern_support.c
+++ b/kern/kern_support.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -32,8 +32,8 @@
 
 #pragma mark - Front Matter
 
-#define  _PTHREAD_CONDATTR_T
-#define  _PTHREAD_COND_T
+#define _PTHREAD_CONDATTR_T
+#define _PTHREAD_COND_T
 #define _PTHREAD_MUTEXATTR_T
 #define _PTHREAD_MUTEX_T
 #define _PTHREAD_RWLOCKATTR_T
@@ -105,11 +105,11 @@ extern void panic(const char *string, ...) __printflike(1,2) __dead2;
 #include <libkern/OSAtomic.h>
 #include <libkern/libkern.h>
 
-#include <sys/pthread_shims.h>
 #include "kern_internal.h"
 
-// XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
-#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
+#ifndef WQ_SETUP_EXIT_THREAD
+#define WQ_SETUP_EXIT_THREAD    8
+#endif
 
 // XXX: Ditto for thread tags from kern/thread.h
 #define	THREAD_TAG_MAINTHREAD 0x1
@@ -120,53 +120,13 @@ lck_grp_attr_t   *pthread_lck_grp_attr;
 lck_grp_t    *pthread_lck_grp;
 lck_attr_t   *pthread_lck_attr;
 
-zone_t pthread_zone_workqueue;
-zone_t pthread_zone_threadlist;
-zone_t pthread_zone_threadreq;
-
-extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
-extern void workqueue_thread_yielded(void);
-
-#define WQ_SETUP_FIRST_USE  1
-#define WQ_SETUP_CLEAR_VOUCHER  2
-static void _setup_wqthread(proc_t p, thread_t th, struct workqueue *wq,
-		struct threadlist *tl, int flags);
-
-static void reset_priority(struct threadlist *tl, pthread_priority_t pri);
-static pthread_priority_t pthread_priority_from_wq_class_index(struct workqueue *wq, int index);
-
-static void wq_unpark_continue(void* ptr, wait_result_t wait_result) __dead2;
-
-static bool workqueue_addnewthread(proc_t p, struct workqueue *wq);
-static void workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use);
-static void workqueue_lock_spin(struct workqueue *);
-static void workqueue_unlock(struct workqueue *);
-
-#define WQ_RUN_TR_THROTTLED 0
-#define WQ_RUN_TR_THREAD_NEEDED 1
-#define WQ_RUN_TR_THREAD_STARTED 2
-#define WQ_RUN_TR_EXITING 3
-static int workqueue_run_threadreq_and_unlock(proc_t p, struct workqueue *wq,
-		struct threadlist *tl, struct threadreq *req, bool may_add_new_thread);
-
-static bool may_start_constrained_thread(struct workqueue *wq,
-		uint32_t at_priclass, struct threadlist *tl, bool may_start_timer);
-
-static mach_vm_offset_t stack_addr_hint(proc_t p, vm_map_t vmap);
-static boolean_t wq_thread_is_busy(uint64_t cur_ts,
-		_Atomic uint64_t *lastblocked_tsp);
-
-int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
-int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
-
-#define WQ_MAXPRI_MIN	0	/* low prio queue num */
-#define WQ_MAXPRI_MAX	2	/* max  prio queuenum */
-#define WQ_PRI_NUM	3	/* number of prio work queues */
-
 #define C_32_STK_ALIGN          16
 #define C_64_STK_ALIGN          16
 #define C_64_REDZONE_LEN        128
 
+// WORKQ use the largest alignment any platform needs
+#define C_WORKQ_STK_ALIGN       16
+
 #define PTHREAD_T_OFFSET 0
 
 /*
@@ -177,11 +137,12 @@ _________________________________________
 -----------------------------------------
 */
 
-#define PTHREAD_START_CUSTOM		0x01000000
+#define PTHREAD_START_CUSTOM		0x01000000 // <rdar://problem/34501401>
 #define PTHREAD_START_SETSCHED		0x02000000
-#define PTHREAD_START_DETACHED		0x04000000
+// was PTHREAD_START_DETACHED		0x04000000
 #define PTHREAD_START_QOSCLASS		0x08000000
 #define PTHREAD_START_TSD_BASE_SET	0x10000000
+#define PTHREAD_START_SUSPENDED		0x20000000
 #define PTHREAD_START_QOSCLASS_MASK	0x00ffffff
 #define PTHREAD_START_POLICY_BITSHIFT 16
 #define PTHREAD_START_POLICY_MASK 0xff
@@ -193,199 +154,13 @@ _________________________________________
 
 #define BASEPRI_DEFAULT 31
 
-#pragma mark sysctls
-
-static uint32_t wq_stalled_window_usecs	= WQ_STALLED_WINDOW_USECS;
-static uint32_t wq_reduce_pool_window_usecs	= WQ_REDUCE_POOL_WINDOW_USECS;
-static uint32_t wq_max_timer_interval_usecs	= WQ_MAX_TIMER_INTERVAL_USECS;
-static uint32_t wq_max_threads			= WORKQUEUE_MAXTHREADS;
-static uint32_t wq_max_constrained_threads	= WORKQUEUE_MAXTHREADS / 8;
-static uint32_t wq_max_concurrency[WORKQUEUE_NUM_BUCKETS + 1]; // set to ncpus on load
-
-SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-	   &wq_stalled_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-	   &wq_reduce_pool_window_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
-	   &wq_max_timer_interval_usecs, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
-	   &wq_max_threads, 0, "");
-
-SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
-	   &wq_max_constrained_threads, 0, "");
-
-#ifdef DEBUG
-static int wq_kevent_test SYSCTL_HANDLER_ARGS;
-SYSCTL_PROC(_debug, OID_AUTO, wq_kevent_test, CTLFLAG_MASKED | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLTYPE_OPAQUE, NULL, 0, wq_kevent_test, 0, "-");
-#endif
-
-static uint32_t wq_init_constrained_limit = 1;
-
 uint32_t pthread_debug_tracing = 1;
 
-SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
-		   &pthread_debug_tracing, 0, "")
-
 static uint32_t pthread_mutex_default_policy;
 
 SYSCTL_INT(_kern, OID_AUTO, pthread_mutex_default_policy, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &pthread_mutex_default_policy, 0, "");
 
-/*
- *       +-----+-----+-----+-----+-----+-----+-----+
- *       | MT  | BG  | UT  | DE  | IN  | UN  | mgr |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | pri |  5  |  4  |  3  |  2  |  1  |  0  |  6  |
- * | qos |  1  |  2  |  3  |  4  |  5  |  6  |  7  |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- */
-static inline uint32_t
-_wq_bucket_to_thread_qos(int pri)
-{
-	if (pri == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-		return WORKQUEUE_EVENT_MANAGER_BUCKET + 1;
-	}
-	return WORKQUEUE_EVENT_MANAGER_BUCKET - pri;
-}
-
-#pragma mark wq_thactive
-
-#if defined(__LP64__)
-// Layout is:
-//   7 * 16 bits for each QoS bucket request count (including manager)
-//   3 bits of best QoS among all pending constrained requests
-//   13 bits of zeroes
-#define WQ_THACTIVE_BUCKET_WIDTH 16
-#define WQ_THACTIVE_QOS_SHIFT    (7 * WQ_THACTIVE_BUCKET_WIDTH)
-#else
-// Layout is:
-//   6 * 10 bits for each QoS bucket request count (except manager)
-//   1 bit for the manager bucket
-//   3 bits of best QoS among all pending constrained requests
-#define WQ_THACTIVE_BUCKET_WIDTH 10
-#define WQ_THACTIVE_QOS_SHIFT    (6 * WQ_THACTIVE_BUCKET_WIDTH + 1)
-#endif
-#define WQ_THACTIVE_BUCKET_MASK  ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1)
-#define WQ_THACTIVE_BUCKET_HALF  (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1))
-#define WQ_THACTIVE_NO_PENDING_REQUEST 6
-
-_Static_assert(sizeof(wq_thactive_t) * CHAR_BIT - WQ_THACTIVE_QOS_SHIFT >= 3,
-		"Make sure we have space to encode a QoS");
-
-static inline wq_thactive_t
-_wq_thactive_fetch_and_add(struct workqueue *wq, wq_thactive_t offset)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
-	return atomic_fetch_add_explicit(&wq->wq_thactive, offset,
-			memory_order_relaxed);
-#else
-	return pthread_kern->atomic_fetch_add_128_relaxed(&wq->wq_thactive, offset);
-#endif
-}
-
-static inline wq_thactive_t
-_wq_thactive(struct workqueue *wq)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__)
-	return atomic_load_explicit(&wq->wq_thactive, memory_order_relaxed);
-#else
-	return pthread_kern->atomic_load_128_relaxed(&wq->wq_thactive);
-#endif
-}
-
-#define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \
-		((tha) >> WQ_THACTIVE_QOS_SHIFT)
-
-static inline uint32_t
-_wq_thactive_best_constrained_req_qos(struct workqueue *wq)
-{
-	// Avoid expensive atomic operations: the three bits we're loading are in
-	// a single byte, and always updated under the workqueue lock
-	wq_thactive_t v = *(wq_thactive_t *)&wq->wq_thactive;
-	return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v);
-}
-
-static inline wq_thactive_t
-_wq_thactive_set_best_constrained_req_qos(struct workqueue *wq,
-		uint32_t orig_qos, uint32_t new_qos)
-{
-	wq_thactive_t v;
-	v = (wq_thactive_t)(new_qos - orig_qos) << WQ_THACTIVE_QOS_SHIFT;
-	/*
-	 * We can do an atomic add relative to the initial load because updates
-	 * to this qos are always serialized under the workqueue lock.
-	 */
-	return _wq_thactive_fetch_and_add(wq, v) + v;
-}
-
-static inline wq_thactive_t
-_wq_thactive_offset_for_qos(int qos)
-{
-	return (wq_thactive_t)1 << (qos * WQ_THACTIVE_BUCKET_WIDTH);
-}
-
-static inline wq_thactive_t
-_wq_thactive_inc(struct workqueue *wq, int qos)
-{
-	return _wq_thactive_fetch_and_add(wq, _wq_thactive_offset_for_qos(qos));
-}
-
-static inline wq_thactive_t
-_wq_thactive_dec(struct workqueue *wq, int qos)
-{
-	return _wq_thactive_fetch_and_add(wq, -_wq_thactive_offset_for_qos(qos));
-}
-
-static inline wq_thactive_t
-_wq_thactive_move(struct workqueue *wq, int oldqos, int newqos)
-{
-	return _wq_thactive_fetch_and_add(wq, _wq_thactive_offset_for_qos(newqos) -
-			_wq_thactive_offset_for_qos(oldqos));
-}
-
-static inline uint32_t
-_wq_thactive_aggregate_downto_qos(struct workqueue *wq, wq_thactive_t v,
-		int qos, uint32_t *busycount, uint32_t *max_busycount)
-{
-	uint32_t count = 0, active;
-	uint64_t curtime;
-
-#ifndef __LP64__
-	/*
-	 * on 32bits the manager bucket is a single bit and the best constrained
-	 * request QoS 3 bits are where the 10 bits of a regular QoS bucket count
-	 * would be. Mask them out.
-	 */
-	v &= ~(~0ull << WQ_THACTIVE_QOS_SHIFT);
-#endif
-	if (busycount) {
-		curtime = mach_absolute_time();
-		*busycount = 0;
-	}
-	if (max_busycount) {
-		*max_busycount = qos + 1;
-	}
-	for (int i = 0; i <= qos; i++, v >>= WQ_THACTIVE_BUCKET_WIDTH) {
-		active = v & WQ_THACTIVE_BUCKET_MASK;
-		count += active;
-		if (busycount && wq->wq_thscheduled_count[i] > active) {
-			if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i])) {
-				/*
-				 * We only consider the last blocked thread for a given bucket
-				 * as busy because we don't want to take the list lock in each
-				 * sched callback. However this is an approximation that could
-				 * contribute to thread creation storms.
-				 */
-				(*busycount)++;
-			}
-		}
-	}
-	return count;
-}
-
 #pragma mark - Process/Thread Setup/Teardown syscalls
 
 static mach_vm_offset_t
@@ -445,41 +220,45 @@ stack_addr_hint(proc_t p, vm_map_t vmap)
 	return stackaddr;
 }
 
+static bool
+_pthread_priority_to_policy(pthread_priority_t priority,
+		thread_qos_policy_data_t *data)
+{
+	data->qos_tier = _pthread_priority_thread_qos(priority);
+	data->tier_importance = _pthread_priority_relpri(priority);
+	if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 ||
+			data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
+		return false;
+	}
+	return true;
+}
+
 /**
  * bsdthread_create system call.  Used by pthread_create.
  */
 int
-_bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
+_bsdthread_create(struct proc *p,
+		__unused user_addr_t user_func, __unused user_addr_t user_funcarg,
+		user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags,
+		user_addr_t *retval)
 {
 	kern_return_t kret;
 	void * sright;
 	int error = 0;
-	int allocated = 0;
-	mach_vm_offset_t stackaddr;
-	mach_vm_size_t th_allocsize = 0;
-	mach_vm_size_t th_guardsize;
-	mach_vm_offset_t th_stack;
-	mach_vm_offset_t th_pthread;
 	mach_vm_offset_t th_tsd_base;
 	mach_port_name_t th_thport;
 	thread_t th;
-	vm_map_t vmap = pthread_kern->current_map();
 	task_t ctask = current_task();
 	unsigned int policy, importance;
 	uint32_t tsd_offset;
-
-	int isLP64 = 0;
+	bool start_suspended = (flags & PTHREAD_START_SUSPENDED);
 
 	if (pthread_kern->proc_get_register(p) == 0) {
 		return EINVAL;
 	}
 
-	PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0);
-
-	isLP64 = proc_is64bit(p);
-	th_guardsize = vm_map_page_size(vmap);
+	PTHREAD_TRACE(pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0);
 
-	stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
 	kret = pthread_kern->thread_create(ctask, &th);
 	if (kret != KERN_SUCCESS)
 		return(ENOMEM);
@@ -495,152 +274,64 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 	}
 
 	if ((flags & PTHREAD_START_CUSTOM) == 0) {
-		mach_vm_size_t pthread_size =
-			vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(vmap));
-		th_allocsize = th_guardsize + user_stack + pthread_size;
-		user_stack += PTHREAD_T_OFFSET;
-
-		kret = mach_vm_map(vmap, &stackaddr,
-				th_allocsize,
-				page_size-1,
-				VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
-				0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-				VM_INHERIT_DEFAULT);
-		if (kret != KERN_SUCCESS){
-			kret = mach_vm_allocate(vmap,
-					&stackaddr, th_allocsize,
-					VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
-		}
-		if (kret != KERN_SUCCESS) {
-			error = ENOMEM;
-			goto out;
-		}
-
-		PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
-
-		allocated = 1;
-		/*
-		 * The guard page is at the lowest address
-		 * The stack base is the highest address
-		 */
-		kret = mach_vm_protect(vmap,  stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
-
-		if (kret != KERN_SUCCESS) {
-			error = ENOMEM;
-			goto out1;
-		}
-
-		th_pthread = stackaddr + th_guardsize + user_stack;
-		th_stack = th_pthread;
-
-		/*
-		* Pre-fault the first page of the new thread's stack and the page that will
-		* contain the pthread_t structure.
-		*/
-		if (vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
-				vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap))){
-			vm_fault( vmap,
-					vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
-					VM_PROT_READ | VM_PROT_WRITE,
-					FALSE,
-					THREAD_UNINT, NULL, 0);
-		}
-
-		vm_fault( vmap,
-				vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap)),
-				VM_PROT_READ | VM_PROT_WRITE,
-				FALSE,
-				THREAD_UNINT, NULL, 0);
-
-	} else {
-		th_stack = user_stack;
-		th_pthread = user_pthread;
-
-		PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
+		error = EINVAL;
+		goto out;
 	}
 
+	PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3);
+
 	tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
 	if (tsd_offset) {
-		th_tsd_base = th_pthread + tsd_offset;
+		th_tsd_base = user_pthread + tsd_offset;
 		kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
 		if (kret == KERN_SUCCESS) {
 			flags |= PTHREAD_START_TSD_BASE_SET;
 		}
 	}
+	/*
+	 * Strip PTHREAD_START_SUSPENDED so that libpthread can observe the kernel
+	 * supports this flag (after the fact).
+	 */
+	flags &= ~PTHREAD_START_SUSPENDED;
 
-#if defined(__i386__) || defined(__x86_64__)
 	/*
-	 * Set up i386 registers & function call.
+	 * Set up registers & function call.
 	 */
-	if (isLP64 == 0) {
-		x86_thread_state32_t state = {
-			.eip = (unsigned int)pthread_kern->proc_get_threadstart(p),
-			.eax = (unsigned int)th_pthread,
-			.ebx = (unsigned int)th_thport,
-			.ecx = (unsigned int)user_func,
-			.edx = (unsigned int)user_funcarg,
-			.edi = (unsigned int)user_stack,
-			.esi = (unsigned int)flags,
-			/*
-			 * set stack pointer
-			 */
-			.esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
+#if defined(__i386__) || defined(__x86_64__)
+	if (proc_is64bit_data(p)) {
+		x86_thread_state64_t state = {
+			.rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
+			.rdi = (uint64_t)user_pthread,
+			.rsi = (uint64_t)th_thport,
+			.rdx = (uint64_t)user_func,    /* golang wants this */
+			.rcx = (uint64_t)user_funcarg, /* golang wants this */
+			.r8  = (uint64_t)user_stack,   /* golang wants this */
+			.r9  = (uint64_t)flags,
+
+			.rsp = (uint64_t)(user_stack - C_64_REDZONE_LEN)
 		};
 
-		error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-		if (error != KERN_SUCCESS) {
-			error = EINVAL;
-			goto out;
-		}
+		(void)pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state);
 	} else {
-		x86_thread_state64_t state64 = {
-			.rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
-			.rdi = (uint64_t)th_pthread,
-			.rsi = (uint64_t)(th_thport),
-			.rdx = (uint64_t)user_func,
-			.rcx = (uint64_t)user_funcarg,
-			.r8 = (uint64_t)user_stack,
-			.r9 = (uint64_t)flags,
-			/*
-			 * set stack pointer aligned to 16 byte boundary
-			 */
-			.rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN)
+		x86_thread_state32_t state = {
+			.eip = (uint32_t)pthread_kern->proc_get_threadstart(p),
+			.eax = (uint32_t)user_pthread,
+			.ebx = (uint32_t)th_thport,
+			.ecx = (uint32_t)user_func,    /* golang wants this */
+			.edx = (uint32_t)user_funcarg, /* golang wants this */
+			.edi = (uint32_t)user_stack,   /* golang wants this */
+			.esi = (uint32_t)flags,
+
+			.esp = (int)((vm_offset_t)(user_stack - C_32_STK_ALIGN))
 		};
 
-		error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
-		if (error != KERN_SUCCESS) {
-			error = EINVAL;
-			goto out;
-		}
-
+		(void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
 	}
-#elif defined(__arm__)
-	arm_thread_state_t state = {
-		.pc = (int)pthread_kern->proc_get_threadstart(p),
-		.r[0] = (unsigned int)th_pthread,
-		.r[1] = (unsigned int)th_thport,
-		.r[2] = (unsigned int)user_func,
-		.r[3] = (unsigned int)user_funcarg,
-		.r[4] = (unsigned int)user_stack,
-		.r[5] = (unsigned int)flags,
-
-		/* Set r7 & lr to 0 for better back tracing */
-		.r[7] = 0,
-		.lr = 0,
-
-		/*
-		 * set stack pointer
-		 */
-		.sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
-	};
-
-	(void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-
 #else
 #error bsdthread_create  not defined for this architecture
 #endif
 
-	if ((flags & PTHREAD_START_SETSCHED) != 0) {
+	if (flags & PTHREAD_START_SETSCHED) {
 		/* Set scheduling parameters if needed */
 		thread_extended_policy_data_t    extinfo;
 		thread_precedence_policy_data_t   precedinfo;
@@ -658,16 +349,16 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 
 		precedinfo.importance = (importance - BASEPRI_DEFAULT);
 		thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
-	} else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
+	} else if (flags & PTHREAD_START_QOSCLASS) {
 		/* Set thread QoS class if requested. */
-		pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK);
-
 		thread_qos_policy_data_t qos;
-		qos.qos_tier = pthread_priority_get_thread_qos(priority);
-		qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 :
-				_pthread_priority_get_relpri(priority);
 
-		pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+		if (!_pthread_priority_to_policy(flags & PTHREAD_START_QOSCLASS_MASK, &qos)) {
+			error = EINVAL;
+			goto out;
+		}
+		pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY,
+				(thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
 	}
 
 	if (pthread_kern->proc_get_mach_thread_self_tsd_offset) {
@@ -677,37 +368,33 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 			bool proc64bit = proc_is64bit(p);
 			if (proc64bit) {
 				uint64_t th_thport_tsd = (uint64_t)th_thport;
-				error = copyout(&th_thport_tsd, th_pthread + tsd_offset +
+				error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
 						mach_thread_self_offset, sizeof(th_thport_tsd));
 			} else {
 				uint32_t th_thport_tsd = (uint32_t)th_thport;
-				error = copyout(&th_thport_tsd, th_pthread + tsd_offset +
+				error = copyout(&th_thport_tsd, user_pthread + tsd_offset +
 						mach_thread_self_offset, sizeof(th_thport_tsd));
 			}
 			if (error) {
-				goto out1;
+				goto out;
 			}
 		}
 	}
 
-	kret = pthread_kern->thread_resume(th);
-	if (kret != KERN_SUCCESS) {
-		error = EINVAL;
-		goto out1;
+	if (!start_suspended) {
+		kret = pthread_kern->thread_resume(th);
+		if (kret != KERN_SUCCESS) {
+			error = EINVAL;
+			goto out;
+		}
 	}
 	thread_deallocate(th);	/* drop the creator reference */
 
-	PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
-
-	// cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms
-	*retval = (user_addr_t)th_pthread;
+	PTHREAD_TRACE(pthread_thread_create|DBG_FUNC_END, error, user_pthread, 0, 0);
 
+	*retval = user_pthread;
 	return(0);
 
-out1:
-	if (allocated != 0) {
-		(void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
-	}
 out:
 	(void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
 	if (pthread_kern->thread_will_park_or_terminate) {
@@ -737,21 +424,24 @@ _bsdthread_terminate(__unused struct proc *p,
 	freeaddr = (mach_vm_offset_t)stackaddr;
 	freesize = size;
 
-	PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
+	PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff);
 
 	if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
 		if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){
 			vm_map_t user_map = pthread_kern->current_map();
 			freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map));
 			kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
-			assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
+#if MACH_ASSERT
+			if (kret != KERN_SUCCESS && kret != KERN_INVALID_ADDRESS) {
+				os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kret);
+			}
+#endif
 			kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE);
 			assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
 		} else {
 			kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
 			if (kret != KERN_SUCCESS) {
-				PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
-				return(EINVAL);
+				PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
 			}
 		}
 	}
@@ -761,10 +451,9 @@ _bsdthread_terminate(__unused struct proc *p,
 	}
 	(void)thread_terminate(th);
 	if (sem != MACH_PORT_NULL) {
-		 kret = pthread_kern->semaphore_signal_internal_trap(sem);
+		kret = pthread_kern->semaphore_signal_internal_trap(sem);
 		if (kret != KERN_SUCCESS) {
-			PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
-			return(EINVAL);
+			PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0);
 		}
 	}
 
@@ -772,14 +461,10 @@ _bsdthread_terminate(__unused struct proc *p,
 		pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
 	}
 
-	PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0);
+	PTHREAD_TRACE(pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0);
 
 	pthread_kern->thread_exception_return();
-	panic("bsdthread_terminate: still running\n");
-
-	PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0);
-
-	return(0);
+	__builtin_unreachable();
 }
 
 /**
@@ -873,29 +558,35 @@ _bsdthread_register(struct proc *p,
 	if (pthread_init_data != 0) {
 		/* Outgoing data that userspace expects as a reply */
 		data.version = sizeof(struct _pthread_registration_data);
+		data.main_qos = _pthread_unspecified_priority();
+
 		if (pthread_kern->qos_main_thread_active()) {
 			mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
 			thread_qos_policy_data_t qos;
 			boolean_t gd = FALSE;
 
-			kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
+			kr = pthread_kern->thread_policy_get(current_thread(),
+					THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
 			if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
-				/* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
+				/*
+				 * Unspecified threads means the kernel wants us
+				 * to impose legacy upon the thread.
+				 */
 				qos.qos_tier = THREAD_QOS_LEGACY;
 				qos.tier_importance = 0;
 
-				kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
+				kr = pthread_kern->thread_policy_set_internal(current_thread(),
+						THREAD_QOS_POLICY, (thread_policy_t)&qos,
+						THREAD_QOS_POLICY_COUNT);
 			}
 
 			if (kr == KERN_SUCCESS) {
-				data.main_qos = thread_qos_get_pthread_priority(qos.qos_tier);
-			} else {
-				data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+				data.main_qos = _pthread_priority_make_from_thread_qos(
+						qos.qos_tier, 0, 0);
 			}
-		} else {
-			data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
 		}
 
+		data.stack_addr_hint = stackaddr;
 		data.mutex_default_policy = pthread_mutex_default_policy;
 
 		kr = copyout(&data, pthread_init_data, pthread_init_sz);
@@ -910,2858 +601,220 @@ _bsdthread_register(struct proc *p,
 	return(0);
 }
 
-#pragma mark - QoS Manipulation
+
+#pragma mark - Workqueue Thread Support
+
+static mach_vm_size_t
+workq_thread_allocsize(proc_t p, vm_map_t wq_map,
+		mach_vm_size_t *guardsize_out)
+{
+	mach_vm_size_t guardsize = vm_map_page_size(wq_map);
+	mach_vm_size_t pthread_size = vm_map_round_page_mask(
+			pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET,
+			vm_map_page_mask(wq_map));
+	if (guardsize_out) *guardsize_out = guardsize;
+	return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
+}
 
 int
-_bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
+workq_create_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t *out_addr)
 {
- 	int rv;
-	thread_t th;
+	mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
+	mach_vm_size_t guardsize, th_allocsize;
+	kern_return_t kret;
 
-	pthread_priority_t priority;
+	th_allocsize = workq_thread_allocsize(p, vmap, &guardsize);
+	kret = mach_vm_map(vmap, &stackaddr, th_allocsize, page_size - 1,
+			VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE, NULL, 0, FALSE,
+			VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
 
-	/* Unused parameters must be zero. */
-	if (arg3 != 0) {
-		return EINVAL;
+	if (kret != KERN_SUCCESS) {
+		kret = mach_vm_allocate(vmap, &stackaddr, th_allocsize,
+				VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
 	}
 
-	/* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
-	if (proc_is64bit(p)) {
-		uint64_t v;
-		rv = copyin(tsd_priority_addr, &v, sizeof(v));
-		if (rv) goto out;
-		priority = (int)(v & 0xffffffff);
-	} else {
-		uint32_t v;
-		rv = copyin(tsd_priority_addr, &v, sizeof(v));
-		if (rv) goto out;
-		priority = v;
+	if (kret != KERN_SUCCESS) {
+		goto fail;
 	}
 
-	if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-		return ESRCH;
+	/*
+	 * The guard page is at the lowest address
+	 * The stack base is the highest address
+	 */
+	kret = mach_vm_protect(vmap, stackaddr, guardsize, FALSE, VM_PROT_NONE);
+	if (kret != KERN_SUCCESS) {
+		goto fail_vm_deallocate;
 	}
 
-	/* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
-	if (th != current_thread()) {
-		thread_deallocate(th);
-		return EPERM;
+	if (out_addr) {
+		*out_addr = stackaddr;
 	}
+	return 0;
 
-	rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval);
-
-	/* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
-	/* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
-
-	thread_deallocate(th);
-
-out:
-	return rv;
+fail_vm_deallocate:
+	(void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
+fail:
+	return kret;
 }
 
-static inline struct threadlist *
-util_get_thread_threadlist_entry(thread_t th)
+int
+workq_destroy_threadstack(proc_t p, vm_map_t vmap, mach_vm_offset_t stackaddr)
 {
-	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-	if (uth) {
-		struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-		return tl;
-	}
-	return NULL;
+	return mach_vm_deallocate(vmap, stackaddr,
+			workq_thread_allocsize(p, vmap, NULL));
 }
 
-boolean_t
-_workq_thread_has_been_unbound(thread_t th, int qos_class)
-{
-	struct threadlist *tl = util_get_thread_threadlist_entry(th);
-	if (!tl) {
-		return FALSE;
+void
+workq_markfree_threadstack(proc_t OS_UNUSED p, thread_t OS_UNUSED th,
+		vm_map_t vmap, user_addr_t stackaddr)
+{
+	// Keep this in sync with workq_setup_thread()
+	const vm_size_t       guardsize = vm_map_page_size(vmap);
+	const user_addr_t     freeaddr = (user_addr_t)stackaddr + guardsize;
+	const vm_map_offset_t freesize = vm_map_trunc_page_mask(
+			(PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1,
+			vm_map_page_mask(vmap)) - guardsize;
+
+	__assert_only kern_return_t kr = mach_vm_behavior_set(vmap, freeaddr,
+			freesize, VM_BEHAVIOR_REUSABLE);
+#if MACH_ASSERT
+	if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
+		os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
 	}
+#endif
+}
 
-	struct workqueue *wq = tl->th_workq;
-	workqueue_lock_spin(wq);
-
-	if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-		goto failure;
-	} else if (qos_class != class_index_get_thread_qos(tl->th_priority)) {
-		goto failure;
-	}
+struct workq_thread_addrs {
+	user_addr_t self;
+	user_addr_t stack_bottom;
+	user_addr_t stack_top;
+};
 
-	if ((tl->th_flags & TH_LIST_KEVENT_BOUND)){
-		goto failure;
-	}
-	tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
+static inline void
+workq_thread_set_top_addr(struct workq_thread_addrs *th_addrs, user_addr_t addr)
+{
+	th_addrs->stack_top = (addr & -C_WORKQ_STK_ALIGN);
+}
 
-	workqueue_unlock(wq);
-	return TRUE;
+static void
+workq_thread_get_addrs(vm_map_t map, user_addr_t stackaddr,
+					   struct workq_thread_addrs *th_addrs)
+{
+	const vm_size_t guardsize = vm_map_page_size(map);
 
-failure:
-	workqueue_unlock(wq);
-	return FALSE;
+	th_addrs->self = (user_addr_t)(stackaddr + PTH_DEFAULT_STACKSIZE +
+			guardsize + PTHREAD_T_OFFSET);
+	workq_thread_set_top_addr(th_addrs, th_addrs->self);
+	th_addrs->stack_bottom = (user_addr_t)(stackaddr + guardsize);
 }
 
-int
-_bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval)
+static inline void
+workq_set_register_state(proc_t p, thread_t th,
+		struct workq_thread_addrs *addrs, mach_port_name_t kport,
+		user_addr_t kevent_list, uint32_t upcall_flags, int kevent_count)
 {
-	thread_qos_policy_data_t qos;
-	mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
-	boolean_t gd = FALSE;
-	thread_t th = current_thread();
-	struct workqueue *wq = NULL;
-	struct threadlist *tl = NULL;
+	user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
+	if (!wqstart_fnptr) {
+		panic("workqueue thread start function pointer is NULL");
+	}
 
-	kern_return_t kr;
-	int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
+#if defined(__i386__) || defined(__x86_64__)
+	if (proc_is64bit_data(p) == 0) {
+		x86_thread_state32_t state = {
+			.eip = (unsigned int)wqstart_fnptr,
+			.eax = /* arg0 */ (unsigned int)addrs->self,
+			.ebx = /* arg1 */ (unsigned int)kport,
+			.ecx = /* arg2 */ (unsigned int)addrs->stack_bottom,
+			.edx = /* arg3 */ (unsigned int)kevent_list,
+			.edi = /* arg4 */ (unsigned int)upcall_flags,
+			.esi = /* arg5 */ (unsigned int)kevent_count,
 
-	if ((flags & _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND) != 0) {
-		tl = util_get_thread_threadlist_entry(th);
-		if (tl) {
-			wq = tl->th_workq;
-		} else {
-			goto qos;
+			.esp = (int)((vm_offset_t)addrs->stack_top),
+		};
+
+		int error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
+		if (error != KERN_SUCCESS) {
+			panic(__func__ ": thread_set_wq_state failed: %d", error);
 		}
+	} else {
+		x86_thread_state64_t state64 = {
+			// x86-64 already passes all the arguments in registers, so we just put them in their final place here
+			.rip = (uint64_t)wqstart_fnptr,
+			.rdi = (uint64_t)addrs->self,
+			.rsi = (uint64_t)kport,
+			.rdx = (uint64_t)addrs->stack_bottom,
+			.rcx = (uint64_t)kevent_list,
+			.r8  = (uint64_t)upcall_flags,
+			.r9  = (uint64_t)kevent_count,
 
-		workqueue_lock_spin(wq);
-		if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
-			tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
-			unsigned int kevent_flags = KEVENT_FLAG_WORKQ | KEVENT_FLAG_UNBIND_CHECK_FLAGS;
-			if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-				kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
-			}
+			.rsp = (uint64_t)(addrs->stack_top)
+		};
 
-			workqueue_unlock(wq);
-			__assert_only int ret = kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
-			assert(ret == 0);
-		} else {
-			workqueue_unlock(wq);
+		int error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
+		if (error != KERN_SUCCESS) {
+			panic(__func__ ": thread_set_wq_state failed: %d", error);
 		}
 	}
+#else
+#error setup_wqthread  not defined for this architecture
+#endif
+}
 
-qos:
-	if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
-		kr = pthread_kern->thread_policy_get(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
-		if (kr != KERN_SUCCESS) {
-			qos_rv = EINVAL;
-			goto voucher;
-		}
+static int
+workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, int upcall_flags,
+		user_addr_t eventlist, int nevents, int kevent_flags,
+		user_addr_t *kevent_list_out, int *kevent_count_out)
+{
+	bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP;
+	int kevent_count = WQ_KEVENT_LIST_LEN;
+	user_addr_t kevent_list = th_addrs->self - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
+	user_addr_t kevent_id_addr = kevent_list;
+	kqueue_id_t kevent_id = -1;
+	int ret;
 
+	if (workloop) {
 		/*
-		 * If we have main-thread QoS then we don't allow a thread to come out
-		 * of QOS_CLASS_UNSPECIFIED.
+		 * The kevent ID goes just below the kevent list.  Sufficiently new
+		 * userspace will know to look there.  Old userspace will just
+		 * ignore it.
 		 */
-		if (pthread_kern->qos_main_thread_active() && qos.qos_tier ==
-				THREAD_QOS_UNSPECIFIED) {
-			qos_rv = EPERM;
-			goto voucher;
-		}
+		kevent_id_addr -= sizeof(kqueue_id_t);
+	}
 
-		if (!tl) {
-			tl = util_get_thread_threadlist_entry(th);
-			if (tl) wq = tl->th_workq;
-		}
+	user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE;
+	user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE;
 
-		PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0);
+	if (workloop) {
+		kevent_flags |= KEVENT_FLAG_WORKLOOP;
+		ret = kevent_id_internal(p, &kevent_id,
+				eventlist, nevents, kevent_list, kevent_count,
+				kevent_data_buf, &kevent_data_available,
+				kevent_flags, &kevent_count);
+		copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id));
+	} else {
+		kevent_flags |= KEVENT_FLAG_WORKQ;
+		ret = kevent_qos_internal(p, -1, eventlist, nevents, kevent_list,
+				kevent_count, kevent_data_buf, &kevent_data_available,
+				kevent_flags, &kevent_count);
+	}
 
-		qos.qos_tier = pthread_priority_get_thread_qos(priority);
-		qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority);
+	// squash any errors into just empty output
+	if (ret != 0 || kevent_count == -1) {
+		*kevent_list_out = NULL;
+		*kevent_count_out = 0;
+		return ret;
+	}
 
-		if (qos.qos_tier == QOS_CLASS_UNSPECIFIED ||
-				qos.tier_importance > 0 || qos.tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
-			qos_rv = EINVAL;
-			goto voucher;
-		}
-
-		/*
-		 * If we're a workqueue, the threadlist item priority needs adjusting,
-		 * along with the bucket we were running in.
-		 */
-		if (tl) {
-			bool try_run_threadreq = false;
-
-			workqueue_lock_spin(wq);
-			kr = pthread_kern->thread_set_workq_qos(th, qos.qos_tier, qos.tier_importance);
-			assert(kr == KERN_SUCCESS || kr == KERN_TERMINATED);
-
-			/* Fix up counters. */
-			uint8_t old_bucket = tl->th_priority;
-			uint8_t new_bucket = pthread_priority_get_class_index(priority);
-
-			if (old_bucket != new_bucket) {
-				_wq_thactive_move(wq, old_bucket, new_bucket);
-				wq->wq_thscheduled_count[old_bucket]--;
-				wq->wq_thscheduled_count[new_bucket]++;
-				if (old_bucket == WORKQUEUE_EVENT_MANAGER_BUCKET ||
-						old_bucket < new_bucket) {
-					/*
-					 * if the QoS of the thread was lowered, then this could
-					 * allow for a higher QoS thread request to run, so we need
-					 * to reevaluate.
-					 */
-					try_run_threadreq = true;
-				}
-				tl->th_priority = new_bucket;
-			}
-
-			bool old_overcommit = !(tl->th_flags & TH_LIST_CONSTRAINED);
-			bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-			if (!old_overcommit && new_overcommit) {
-				if (wq->wq_constrained_threads_scheduled-- ==
-						wq_max_constrained_threads) {
-					try_run_threadreq = true;
-				}
-				tl->th_flags &= ~TH_LIST_CONSTRAINED;
-			} else if (old_overcommit && !new_overcommit) {
-				wq->wq_constrained_threads_scheduled++;
-				tl->th_flags |= TH_LIST_CONSTRAINED;
-			}
-
-			if (try_run_threadreq) {
-				workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-			} else {
-				workqueue_unlock(wq);
-			}
-		} else {
-			kr = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
-			if (kr != KERN_SUCCESS) {
-				qos_rv = EINVAL;
-			}
-		}
-
-		PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0);
-	}
-
-voucher:
-	if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) {
-		kr = pthread_kern->thread_set_voucher_name(voucher);
-		if (kr != KERN_SUCCESS) {
-			voucher_rv = ENOENT;
-			goto fixedpri;
-		}
-	}
-
-fixedpri:
-	if (qos_rv) goto done;
-	if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
-		thread_extended_policy_data_t extpol = {.timeshare = 0};
-
-		if (!tl) tl  = util_get_thread_threadlist_entry(th);
-		if (tl) {
-			/* Not allowed on workqueue threads */
-			fixedpri_rv = ENOTSUP;
-			goto done;
-		}
-
-		kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
-		if (kr != KERN_SUCCESS) {
-			fixedpri_rv = EINVAL;
-			goto done;
-		}
-	} else if ((flags & _PTHREAD_SET_SELF_TIMESHARE_FLAG) != 0) {
-		thread_extended_policy_data_t extpol = {.timeshare = 1};
-
-		if (!tl) tl = util_get_thread_threadlist_entry(th);
-		if (tl) {
-			/* Not allowed on workqueue threads */
-			fixedpri_rv = ENOTSUP;
-			goto done;
-		}
-
-		kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
-		if (kr != KERN_SUCCESS) {
-			fixedpri_rv = EINVAL;
-			goto done;
-		}
-	}
-
-done:
-	if (qos_rv && voucher_rv) {
-		/* Both failed, give that a unique error. */
-		return EBADMSG;
-	}
-
-	if (qos_rv) {
-		return qos_rv;
-	}
-
-	if (voucher_rv) {
-		return voucher_rv;
-	}
-
-	if (fixedpri_rv) {
-		return fixedpri_rv;
-	}
-
-	return 0;
-}
-
-int
-_bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
-{
-	thread_t th;
-	int rv = 0;
-
-	if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-		return ESRCH;
-	}
-
-	int override_qos = pthread_priority_get_thread_qos(priority);
-
-	struct threadlist *tl = util_get_thread_threadlist_entry(th);
-	if (tl) {
-		PTHREAD_TRACE_WQ(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
-	}
-
-	/* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
-	pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
-			resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE, USER_ADDR_NULL, MACH_PORT_NULL);
-	thread_deallocate(th);
-	return rv;
-}
-
-int
-_bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int __unused *retval)
-{
-	thread_t th;
-	int rv = 0;
-
-	if (arg3 != 0) {
-		return EINVAL;
-	}
-
-	if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-		return ESRCH;
-	}
-
-	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-
-	struct threadlist *tl = util_get_thread_threadlist_entry(th);
-	if (tl) {
-		PTHREAD_TRACE_WQ(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 0, 0, 0);
-	}
-
-	pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
-
-	thread_deallocate(th);
-	return rv;
-}
-
-static int
-_bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, user_addr_t ulock_addr)
-{
-	thread_t th;
-	int rv = 0;
-
-	if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
-		return ESRCH;
-	}
-
-	int override_qos = pthread_priority_get_thread_qos(priority);
-
-	struct threadlist *tl = util_get_thread_threadlist_entry(th);
-	if (!tl) {
-		thread_deallocate(th);
-		return EPERM;
-	}
-
-	PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
-
-	rv = pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
-			resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE, ulock_addr, kport);
-
-	thread_deallocate(th);
-	return rv;
-}
-
-int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd,
-		mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
-{
-	return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, resource, USER_ADDR_NULL);
-}
-
-int
-_bsdthread_ctl_qos_override_dispatch(struct proc *p __unused, user_addr_t cmd __unused, mach_port_name_t kport, pthread_priority_t priority, user_addr_t ulock_addr, int __unused *retval)
-{
-	return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, USER_ADDR_NULL, ulock_addr);
-}
-
-int
-_bsdthread_ctl_qos_override_reset(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
-{
-	if (arg1 != 0 || arg2 != 0 || arg3 != 0) {
-		return EINVAL;
-	}
-
-	return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, 1 /* reset_all */, 0, 0, retval);
-}
-
-int
-_bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval)
-{
-	if ((reset_all && (resource != 0)) || arg3 != 0) {
-		return EINVAL;
-	}
-
-	thread_t th = current_thread();
-	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-	struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-
-	if (!tl) {
-		return EPERM;
-	}
-
-	PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, 0, 0, 0, 0);
-
-	resource = reset_all ? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD : resource;
-	pthread_kern->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
-
-	return 0;
-}
-
-static int
-_bsdthread_ctl_max_parallelism(struct proc __unused *p, user_addr_t __unused cmd,
-		int qos, unsigned long flags, int *retval)
-{
-	_Static_assert(QOS_PARALLELISM_COUNT_LOGICAL ==
-			_PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL, "logical");
-	_Static_assert(QOS_PARALLELISM_REALTIME ==
-			_PTHREAD_QOS_PARALLELISM_REALTIME, "realtime");
-
-	if (flags & ~(QOS_PARALLELISM_REALTIME | QOS_PARALLELISM_COUNT_LOGICAL)) {
-		return EINVAL;
-	}
-
-	if (flags & QOS_PARALLELISM_REALTIME) {
-		if (qos) {
-			return EINVAL;
-		}
-	} else if (qos == THREAD_QOS_UNSPECIFIED || qos >= THREAD_QOS_LAST) {
-		return EINVAL;
-	}
-
-	*retval = pthread_kern->qos_max_parallelism(qos, flags);
-	return 0;
-}
-
-int
-_bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
-{
-	switch (cmd) {
-	case BSDTHREAD_CTL_SET_QOS:
-		return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
-	case BSDTHREAD_CTL_QOS_OVERRIDE_START:
-		return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-	case BSDTHREAD_CTL_QOS_OVERRIDE_END:
-		return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
-	case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
-		return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval);
-	case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
-		return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-	case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD:
-		return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
-	case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET:
-		return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, (int)arg1, arg2, arg3, retval);
-	case BSDTHREAD_CTL_SET_SELF:
-		return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval);
-	case BSDTHREAD_CTL_QOS_MAX_PARALLELISM:
-		return _bsdthread_ctl_max_parallelism(p, cmd, (int)arg1, (unsigned long)arg2, retval);
-	default:
-		return EINVAL;
-	}
-}
-
-#pragma mark - Workqueue Implementation
-
-#pragma mark wq_flags
-
-static inline uint32_t
-_wq_flags(struct workqueue *wq)
-{
-	return atomic_load_explicit(&wq->wq_flags, memory_order_relaxed);
-}
-
-static inline bool
-_wq_exiting(struct workqueue *wq)
-{
-	return _wq_flags(wq) & WQ_EXITING;
-}
-
-static inline uint32_t
-_wq_flags_or_orig(struct workqueue *wq, uint32_t v)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS
-	uint32_t state;
-	do {
-		state = _wq_flags(wq);
-	} while (!OSCompareAndSwap(state, state | v, &wq->wq_flags));
-	return state;
-#else
-	return atomic_fetch_or_explicit(&wq->wq_flags, v, memory_order_relaxed);
-#endif
-}
-
-static inline uint32_t
-_wq_flags_and_orig(struct workqueue *wq, uint32_t v)
-{
-#if PTHREAD_INLINE_RMW_ATOMICS
-	uint32_t state;
-	do {
-		state = _wq_flags(wq);
-	} while (!OSCompareAndSwap(state, state & v, &wq->wq_flags));
-	return state;
-#else
-	return atomic_fetch_and_explicit(&wq->wq_flags, v, memory_order_relaxed);
-#endif
-}
-
-static inline bool
-WQ_TIMER_DELAYED_NEEDED(struct workqueue *wq)
-{
-	uint32_t oldflags, newflags;
-	do {
-		oldflags = _wq_flags(wq);
-		if (oldflags & (WQ_EXITING | WQ_ATIMER_DELAYED_RUNNING)) {
-			return false;
-		}
-		newflags = oldflags | WQ_ATIMER_DELAYED_RUNNING;
-	} while (!OSCompareAndSwap(oldflags, newflags, &wq->wq_flags));
-	return true;
-}
-
-static inline bool
-WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue *wq)
-{
-	uint32_t oldflags, newflags;
-	do {
-		oldflags = _wq_flags(wq);
-		if (oldflags & (WQ_EXITING | WQ_ATIMER_IMMEDIATE_RUNNING)) {
-			return false;
-		}
-		newflags = oldflags | WQ_ATIMER_IMMEDIATE_RUNNING;
-	} while (!OSCompareAndSwap(oldflags, newflags, &wq->wq_flags));
-	return true;
-}
-
-#pragma mark thread requests pacing
-
-static inline uint32_t
-_wq_pacing_shift_for_pri(int pri)
-{
-	return _wq_bucket_to_thread_qos(pri) - 1;
-}
-
-static inline int
-_wq_highest_paced_priority(struct workqueue *wq)
-{
-	uint8_t paced = wq->wq_paced;
-	int msb = paced ? 32 - __builtin_clz(paced) : 0; // fls(paced) == bit + 1
-	return WORKQUEUE_EVENT_MANAGER_BUCKET - msb;
-}
-
-static inline uint8_t
-_wq_pacing_bit_for_pri(int pri)
-{
-	return 1u << _wq_pacing_shift_for_pri(pri);
-}
-
-static inline bool
-_wq_should_pace_priority(struct workqueue *wq, int pri)
-{
-	return wq->wq_paced >= _wq_pacing_bit_for_pri(pri);
-}
-
-static inline void
-_wq_pacing_start(struct workqueue *wq, struct threadlist *tl)
-{
-	uint8_t bit = _wq_pacing_bit_for_pri(tl->th_priority);
-	assert((tl->th_flags & TH_LIST_PACING) == 0);
-	assert((wq->wq_paced & bit) == 0);
-	wq->wq_paced |= bit;
-	tl->th_flags |= TH_LIST_PACING;
-}
-
-static inline bool
-_wq_pacing_end(struct workqueue *wq, struct threadlist *tl)
-{
-	if (tl->th_flags & TH_LIST_PACING) {
-		uint8_t bit = _wq_pacing_bit_for_pri(tl->th_priority);
-		assert((wq->wq_paced & bit) != 0);
-		wq->wq_paced ^= bit;
-		tl->th_flags &= ~TH_LIST_PACING;
-		return wq->wq_paced < bit; // !_wq_should_pace_priority
-	}
-	return false;
-}
-
-#pragma mark thread requests
-
-static void
-_threadreq_init_alloced(struct threadreq *req, int priority, int flags)
-{
-	assert((flags & TR_FLAG_ONSTACK) == 0);
-	req->tr_state = TR_STATE_NEW;
-	req->tr_priority = priority;
-	req->tr_flags = flags;
-}
-
-static void
-_threadreq_init_stack(struct threadreq *req, int priority, int flags)
-{
-	req->tr_state = TR_STATE_NEW;
-	req->tr_priority = priority;
-	req->tr_flags = flags | TR_FLAG_ONSTACK;
-}
-
-static void
-_threadreq_copy_prepare(struct workqueue *wq)
-{
-again:
-	if (wq->wq_cached_threadreq) {
-		return;
-	}
-
-	workqueue_unlock(wq);
-	struct threadreq *req = zalloc(pthread_zone_threadreq);
-	workqueue_lock_spin(wq);
-
-	if (wq->wq_cached_threadreq) {
-		/*
-		 * We lost the race and someone left behind an extra threadreq for us
-		 * to use.  Throw away our request and retry.
-		 */
-		workqueue_unlock(wq);
-		zfree(pthread_zone_threadreq, req);
-		workqueue_lock_spin(wq);
-		goto again;
-	} else {
-		wq->wq_cached_threadreq = req;
-	}
-
-	assert(wq->wq_cached_threadreq);
-}
-
-static bool
-_threadreq_copy_prepare_noblock(struct workqueue *wq)
-{
-	if (wq->wq_cached_threadreq) {
-		return true;
-	}
-
-	wq->wq_cached_threadreq = zalloc_noblock(pthread_zone_threadreq);
-
-	return wq->wq_cached_threadreq != NULL;
-}
-
-static inline struct threadreq_head *
-_threadreq_list_for_req(struct workqueue *wq, const struct threadreq *req)
-{
-	if (req->tr_flags & TR_FLAG_OVERCOMMIT) {
-		return &wq->wq_overcommit_reqlist[req->tr_priority];
-	} else {
-		return &wq->wq_reqlist[req->tr_priority];
-	}
-}
-
-static void
-_threadreq_enqueue(struct workqueue *wq, struct threadreq *req)
-{
-	assert(req && req->tr_state == TR_STATE_NEW);
-	if (req->tr_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-		assert(wq->wq_event_manager_threadreq.tr_state != TR_STATE_WAITING);
-		memcpy(&wq->wq_event_manager_threadreq, req, sizeof(struct threadreq));
-		req = &wq->wq_event_manager_threadreq;
-		req->tr_flags &= ~(TR_FLAG_ONSTACK | TR_FLAG_NO_PACING);
-	} else {
-		if (req->tr_flags & TR_FLAG_ONSTACK) {
-			assert(wq->wq_cached_threadreq);
-			struct threadreq *newreq = wq->wq_cached_threadreq;
-			wq->wq_cached_threadreq = NULL;
-
-			memcpy(newreq, req, sizeof(struct threadreq));
-			newreq->tr_flags &= ~(TR_FLAG_ONSTACK | TR_FLAG_NO_PACING);
-			req->tr_state = TR_STATE_DEAD;
-			req = newreq;
-		}
-		TAILQ_INSERT_TAIL(_threadreq_list_for_req(wq, req), req, tr_entry);
-	}
-	req->tr_state = TR_STATE_WAITING;
-	wq->wq_reqcount++;
-}
-
-static void
-_threadreq_dequeue(struct workqueue *wq, struct threadreq *req)
-{
-	if (req->tr_priority != WORKQUEUE_EVENT_MANAGER_BUCKET) {
-		struct threadreq_head *req_list = _threadreq_list_for_req(wq, req);
-#if DEBUG
-		struct threadreq *cursor = NULL;
-		TAILQ_FOREACH(cursor, req_list, tr_entry) {
-			if (cursor == req) break;
-		}
-		assert(cursor == req);
-#endif
-		TAILQ_REMOVE(req_list, req, tr_entry);
-	}
-	wq->wq_reqcount--;
-}
-
-/*
- * Mark a thread request as complete.  At this point, it is treated as owned by
- * the submitting subsystem and you should assume it could be freed.
- *
- * Called with the workqueue lock held.
- */
-static int
-_threadreq_complete_and_unlock(proc_t p, struct workqueue *wq,
-		struct threadreq *req, struct threadlist *tl)
-{
-	struct threadreq *req_tofree = NULL;
-	bool sync = (req->tr_state == TR_STATE_NEW);
-	bool workloop = req->tr_flags & TR_FLAG_WORKLOOP;
-	bool onstack = req->tr_flags & TR_FLAG_ONSTACK;
-	bool kevent = req->tr_flags & TR_FLAG_KEVENT;
-	bool unbinding = tl->th_flags & TH_LIST_UNBINDING;
-	bool locked = true;
-	bool waking_parked_thread = (tl->th_flags & TH_LIST_BUSY);
-	int ret;
-
-	req->tr_state = TR_STATE_COMPLETE;
-
-	if (!workloop && !onstack && req != &wq->wq_event_manager_threadreq) {
-		if (wq->wq_cached_threadreq) {
-			req_tofree = req;
-		} else {
-			wq->wq_cached_threadreq = req;
-		}
-	}
-
-	if (tl->th_flags & TH_LIST_UNBINDING) {
-		tl->th_flags &= ~TH_LIST_UNBINDING;
-		assert((tl->th_flags & TH_LIST_KEVENT_BOUND));
-	} else if (workloop || kevent) {
-		assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-		tl->th_flags |= TH_LIST_KEVENT_BOUND;
-	}
-
-	if (workloop) {
-		workqueue_unlock(wq);
-		ret = pthread_kern->workloop_fulfill_threadreq(wq->wq_proc, (void*)req,
-				tl->th_thread, sync ? WORKLOOP_FULFILL_THREADREQ_SYNC : 0);
-		assert(ret == 0);
-		locked = false;
-	} else if (kevent) {
-		unsigned int kevent_flags = KEVENT_FLAG_WORKQ;
-		if (sync) {
-			kevent_flags |= KEVENT_FLAG_SYNCHRONOUS_BIND;
-		}
-		if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-			kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
-		}
-		workqueue_unlock(wq);
-		ret = kevent_qos_internal_bind(wq->wq_proc,
-				class_index_get_thread_qos(tl->th_priority), tl->th_thread,
-				kevent_flags);
-		if (ret != 0) {
-			workqueue_lock_spin(wq);
-			tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
-			locked = true;
-		} else {
-			locked = false;
-		}
-	}
-
-	/*
-	 * Run Thread, Run!
-	 */
-	PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 0, 0, 0, 0);
-	PTHREAD_TRACE_WQ_REQ(TRACE_wq_runitem | DBG_FUNC_START, wq, req, tl->th_priority,
-			thread_tid(current_thread()), thread_tid(tl->th_thread));
-
-	if (waking_parked_thread) {
-		if (!locked) {
-			workqueue_lock_spin(wq);
-		}
-		tl->th_flags &= ~(TH_LIST_BUSY);
-		if ((tl->th_flags & TH_LIST_REMOVING_VOUCHER) == 0) {
-			/*
-			 * If the thread is in the process of removing its voucher, then it
-			 * isn't actually in the wait event yet and we don't need to wake
-			 * it up.  Save the trouble (and potential lock-ordering issues
-			 * (see 30617015)).
-			 */
-			thread_wakeup_thread(tl, tl->th_thread);
-		}
-		workqueue_unlock(wq);
-
-		if (req_tofree) zfree(pthread_zone_threadreq, req_tofree);
-		return WQ_RUN_TR_THREAD_STARTED;
-	}
-
-	assert ((tl->th_flags & TH_LIST_PACING) == 0);
-	if (locked) {
-		workqueue_unlock(wq);
-	}
-	if (req_tofree) zfree(pthread_zone_threadreq, req_tofree);
-	if (unbinding) {
-		return WQ_RUN_TR_THREAD_STARTED;
-	}
-	_setup_wqthread(p, tl->th_thread, wq, tl, WQ_SETUP_CLEAR_VOUCHER);
-	pthread_kern->unix_syscall_return(EJUSTRETURN);
-	__builtin_unreachable();
-}
-
-/*
- * Mark a thread request as cancelled.  Has similar ownership semantics to the
- * complete call above.
- */
-static void
-_threadreq_cancel(struct workqueue *wq, struct threadreq *req)
-{
-	assert(req->tr_state == TR_STATE_WAITING);
-	req->tr_state = TR_STATE_DEAD;
-
-	assert((req->tr_flags & TR_FLAG_ONSTACK) == 0);
-	if (req->tr_flags & TR_FLAG_WORKLOOP) {
-		__assert_only int ret;
-		ret = pthread_kern->workloop_fulfill_threadreq(wq->wq_proc, (void*)req,
-				THREAD_NULL, WORKLOOP_FULFILL_THREADREQ_CANCEL);
-		assert(ret == 0 || ret == ECANCELED);
-	} else if (req != &wq->wq_event_manager_threadreq) {
-		zfree(pthread_zone_threadreq, req);
-	}
-}
-
-#pragma mark workqueue lock
-
-static boolean_t workqueue_lock_spin_is_acquired_kdp(struct workqueue *wq) {
-  return kdp_lck_spin_is_acquired(&wq->wq_lock);
-}
-
-static void
-workqueue_lock_spin(struct workqueue *wq)
-{
-	assert(ml_get_interrupts_enabled() == TRUE);
-	lck_spin_lock(&wq->wq_lock);
-}
-
-static bool
-workqueue_lock_try(struct workqueue *wq)
-{
-	return lck_spin_try_lock(&wq->wq_lock);
-}
-
-static void
-workqueue_unlock(struct workqueue *wq)
-{
-	lck_spin_unlock(&wq->wq_lock);
-}
-
-#pragma mark workqueue add timer
-
-/**
- * Sets up the timer which will call out to workqueue_add_timer
- */
-static void
-workqueue_interval_timer_start(struct workqueue *wq)
-{
-	uint64_t deadline;
-
-	/* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the
-	 ATIMER_RUNNING flag is not present.  The net effect here is that if a
-	 sequence of threads is required, we'll double the time before we give out
-	 the next one. */
-	if (wq->wq_timer_interval == 0) {
-		wq->wq_timer_interval = wq_stalled_window_usecs;
-
-	} else {
-		wq->wq_timer_interval = wq->wq_timer_interval * 2;
-
-		if (wq->wq_timer_interval > wq_max_timer_interval_usecs) {
-			wq->wq_timer_interval = wq_max_timer_interval_usecs;
-		}
-	}
-	clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
-
-	PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
-			_wq_flags(wq), wq->wq_timer_interval, 0);
-
-	thread_call_t call = wq->wq_atimer_delayed_call;
-	if (thread_call_enter1_delayed(call, call, deadline)) {
-		panic("delayed_call was already enqueued");
-	}
-}
-
-/**
- * Immediately trigger the workqueue_add_timer
- */
-static void
-workqueue_interval_timer_trigger(struct workqueue *wq)
-{
-	PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount,
-			_wq_flags(wq), 0, 0);
-
-	thread_call_t call = wq->wq_atimer_immediate_call;
-	if (thread_call_enter1(call, call)) {
-		panic("immediate_call was already enqueued");
-	}
-}
-
-/**
- * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts
- */
-static boolean_t
-wq_thread_is_busy(uint64_t cur_ts, _Atomic uint64_t *lastblocked_tsp)
-{
-	clock_sec_t	secs;
-	clock_usec_t	usecs;
-	uint64_t lastblocked_ts;
-	uint64_t elapsed;
-
-	lastblocked_ts = atomic_load_explicit(lastblocked_tsp, memory_order_relaxed);
-	if (lastblocked_ts >= cur_ts) {
-		/*
-		 * because the update of the timestamp when a thread blocks isn't
-		 * serialized against us looking at it (i.e. we don't hold the workq lock)
-		 * it's possible to have a timestamp that matches the current time or
-		 * that even looks to be in the future relative to when we grabbed the current
-		 * time... just treat this as a busy thread since it must have just blocked.
-		 */
-		return (TRUE);
-	}
-	elapsed = cur_ts - lastblocked_ts;
-
-	pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
-
-	return (secs == 0 && usecs < wq_stalled_window_usecs);
-}
-
-/**
- * handler function for the timer
- */
-static void
-workqueue_add_timer(struct workqueue *wq, thread_call_t thread_call_self)
-{
-	proc_t p = wq->wq_proc;
-
-	workqueue_lock_spin(wq);
-
-	PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq,
-			_wq_flags(wq), wq->wq_nthreads, wq->wq_thidlecount, 0);
-
-	/*
-	 * There's two tricky issues here.
-	 *
-	 * First issue: we start the thread_call's that invoke this routine without
-	 * the workqueue lock held.  The scheduler callback needs to trigger
-	 * reevaluation of the number of running threads but shouldn't take that
-	 * lock, so we can't use it to synchronize state around the thread_call.
-	 * As a result, it might re-enter the thread_call while this routine is
-	 * already running.  This could cause it to fire a second time and we'll
-	 * have two add_timers running at once.  Obviously, we don't want that to
-	 * keep stacking, so we need to keep it at two timers.
-	 *
-	 * Solution: use wq_flags (accessed via atomic CAS) to synchronize the
-	 * enqueue of the thread_call itself.  When a thread needs to trigger the
-	 * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets
-	 * the flag then does a thread_call_enter.  We'll then remove that flag
-	 * only once we've got the lock and it's safe for the thread_call to be
-	 * entered again.
-	 *
-	 * Second issue: we need to make sure that the two timers don't execute this
-	 * routine concurrently.  We can't use the workqueue lock for this because
-	 * we'll need to drop it during our execution.
-	 *
-	 * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that
-	 * we are currently executing the routine and the next thread should wait.
-	 *
-	 * After all that, we arrive at the following four possible states:
-	 * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY       no pending timer, no active timer
-	 * !WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY       no pending timer,  1 active timer
-	 *  WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY        1 pending timer, no active timer
-	 *  WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY        1 pending timer,  1 active timer
-	 *
-	 * Further complication sometimes we need to trigger this function to run
-	 * without delay.  Because we aren't under a lock between setting
-	 * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply
-	 * re-enter the thread call: if thread_call_enter() returned false, we
-	 * wouldn't be able to distinguish the case where the thread_call had
-	 * already fired from the case where it hadn't been entered yet from the
-	 * other thread.  So, we use a separate thread_call for immediate
-	 * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING.
-	 */
-
-	while (wq->wq_lflags & WQL_ATIMER_BUSY) {
-		wq->wq_lflags |= WQL_ATIMER_WAITING;
-
-		assert_wait((caddr_t)wq, (THREAD_UNINT));
-		workqueue_unlock(wq);
-
-		thread_block(THREAD_CONTINUE_NULL);
-
-		workqueue_lock_spin(wq);
-	}
-	/*
-	 * Prevent _workqueue_mark_exiting() from going away
-	 */
-	wq->wq_lflags |= WQL_ATIMER_BUSY;
-
-	/*
-	 * Decide which timer we are and remove the RUNNING flag.
-	 */
-	if (thread_call_self == wq->wq_atimer_delayed_call) {
-		uint64_t wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_DELAYED_RUNNING);
-		if ((wq_flags & WQ_ATIMER_DELAYED_RUNNING) == 0) {
-			panic("workqueue_add_timer(delayed) w/o WQ_ATIMER_DELAYED_RUNNING");
-		}
-	} else if (thread_call_self == wq->wq_atimer_immediate_call) {
-		uint64_t wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_IMMEDIATE_RUNNING);
-		if ((wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) == 0) {
-			panic("workqueue_add_timer(immediate) w/o WQ_ATIMER_IMMEDIATE_RUNNING");
-		}
-	} else {
-		panic("workqueue_add_timer can't figure out which timer it is");
-	}
-
-	int ret = WQ_RUN_TR_THREAD_STARTED;
-	while (ret == WQ_RUN_TR_THREAD_STARTED && wq->wq_reqcount) {
-		ret = workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-
-		workqueue_lock_spin(wq);
-	}
-	_threadreq_copy_prepare(wq);
-
-	/*
-	 * If we called WQ_TIMER_NEEDED above, then this flag will be set if that
-	 * call marked the timer running.  If so, we let the timer interval grow.
-	 * Otherwise, we reset it back to 0.
-	 */
-	uint32_t wq_flags = _wq_flags(wq);
-	if (!(wq_flags & WQ_ATIMER_DELAYED_RUNNING)) {
-		wq->wq_timer_interval = 0;
-	}
-
-	wq->wq_lflags &= ~WQL_ATIMER_BUSY;
-
-	if ((wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
-		/*
-		 * wakeup the thread hung up in _workqueue_mark_exiting or
-		 * workqueue_add_timer waiting for this timer to finish getting out of
-		 * the way
-		 */
-		wq->wq_lflags &= ~WQL_ATIMER_WAITING;
-		wakeup(wq);
-	}
-
-	PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, 0, wq->wq_nthreads, wq->wq_thidlecount, 0);
-
-	workqueue_unlock(wq);
-}
-
-#pragma mark thread state tracking
-
-// called by spinlock code when trying to yield to lock owner
-void
-_workqueue_thread_yielded(void)
-{
-}
-
-static void
-workqueue_callback(int type, thread_t thread)
-{
-	struct uthread *uth = pthread_kern->get_bsdthread_info(thread);
-	struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-	struct workqueue *wq = tl->th_workq;
-	uint32_t old_count, req_qos, qos = tl->th_priority;
-	wq_thactive_t old_thactive;
-
-	switch (type) {
-	case SCHED_CALL_BLOCK: {
-		bool start_timer = false;
-
-		old_thactive = _wq_thactive_dec(wq, tl->th_priority);
-		req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
-		old_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-				qos, NULL, NULL);
-
-		if (old_count == wq_max_concurrency[tl->th_priority]) {
-			/*
-			 * The number of active threads at this priority has fallen below
-			 * the maximum number of concurrent threads that are allowed to run
-			 *
-			 * if we collide with another thread trying to update the
-			 * last_blocked (really unlikely since another thread would have to
-			 * get scheduled and then block after we start down this path), it's
-			 * not a problem.  Either timestamp is adequate, so no need to retry
-			 */
-			atomic_store_explicit(&wq->wq_lastblocked_ts[qos],
-					mach_absolute_time(), memory_order_relaxed);
-		}
-
-		if (req_qos == WORKQUEUE_EVENT_MANAGER_BUCKET || qos > req_qos) {
-			/*
-			 * The blocking thread is at a lower QoS than the highest currently
-			 * pending constrained request, nothing has to be redriven
-			 */
-		} else {
-			uint32_t max_busycount, old_req_count;
-			old_req_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-					req_qos, NULL, &max_busycount);
-			/*
-			 * If it is possible that may_start_constrained_thread had refused
-			 * admission due to being over the max concurrency, we may need to
-			 * spin up a new thread.
-			 *
-			 * We take into account the maximum number of busy threads
-			 * that can affect may_start_constrained_thread as looking at the
-			 * actual number may_start_constrained_thread will see is racy.
-			 *
-			 * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is
-			 * between NCPU (4) and NCPU - 2 (2) we need to redrive.
-			 */
-			if (wq_max_concurrency[req_qos] <= old_req_count + max_busycount &&
-					old_req_count <= wq_max_concurrency[req_qos]) {
-				if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-					start_timer = true;
-					workqueue_interval_timer_start(wq);
-				}
-			}
-		}
-
-		PTHREAD_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq,
-				old_count - 1, qos | (req_qos << 8),
-				wq->wq_reqcount << 1 | start_timer, 0);
-		break;
-	}
-	case SCHED_CALL_UNBLOCK: {
-		/*
-		 * we cannot take the workqueue_lock here...
-		 * an UNBLOCK can occur from a timer event which
-		 * is run from an interrupt context... if the workqueue_lock
-		 * is already held by this processor, we'll deadlock...
-		 * the thread lock for the thread being UNBLOCKED
-		 * is also held
-		 */
-		old_thactive = _wq_thactive_inc(wq, qos);
-		if (pthread_debug_tracing) {
-			req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive);
-			old_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive,
-					qos, NULL, NULL);
-			PTHREAD_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq,
-					old_count + 1, qos | (req_qos << 8),
-					wq->wq_threads_scheduled, 0);
-		}
-		break;
-	}
-	}
-}
-
-sched_call_t
-_workqueue_get_sched_callback(void)
-{
-	return workqueue_callback;
-}
-
-#pragma mark thread addition/removal
-
-static mach_vm_size_t
-_workqueue_allocsize(struct workqueue *wq)
-{
-	proc_t p = wq->wq_proc;
-	mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
-	mach_vm_size_t pthread_size =
-		vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
-	return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
-}
-
-/**
- * pop goes the thread
- *
- * If fromexit is set, the call is from workqueue_exit(,
- * so some cleanups are to be avoided.
- */
-static void
-workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use)
-{
-	struct uthread * uth;
-	struct workqueue * wq = tl->th_workq;
-
-	if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
-		TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
-	} else {
-		TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-	}
-
-	if (fromexit == 0) {
-		assert(wq->wq_nthreads && wq->wq_thidlecount);
-		wq->wq_nthreads--;
-		wq->wq_thidlecount--;
-	}
-
-	/*
-	 * Clear the threadlist pointer in uthread so
-	 * blocked thread on wakeup for termination will
-	 * not access the thread list as it is going to be
-	 * freed.
-	 */
-	pthread_kern->thread_sched_call(tl->th_thread, NULL);
-
-	uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-	if (uth != (struct uthread *)0) {
-		pthread_kern->uthread_set_threadlist(uth, NULL);
-	}
-	if (fromexit == 0) {
-		/* during exit the lock is not held */
-		workqueue_unlock(wq);
-	}
-
-	if ( (tl->th_flags & TH_LIST_NEW) || first_use ) {
-		/*
-		 * thread was created, but never used...
-		 * need to clean up the stack and port ourselves
-		 * since we're not going to spin up through the
-		 * normal exit path triggered from Libc
-		 */
-		if (fromexit == 0) {
-			/* vm map is already deallocated when this is called from exit */
-			(void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, _workqueue_allocsize(wq));
-		}
-		(void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport);
-	}
-	/*
-	 * drop our ref on the thread
-	 */
-	thread_deallocate(tl->th_thread);
-
-	zfree(pthread_zone_threadlist, tl);
-}
-
-
-/**
- * Try to add a new workqueue thread.
- *
- * - called with workq lock held
- * - dropped and retaken around thread creation
- * - return with workq lock held
- */
-static bool
-workqueue_addnewthread(proc_t p, struct workqueue *wq)
-{
-	kern_return_t kret;
-
-	wq->wq_nthreads++;
-
-	workqueue_unlock(wq);
-
-	struct threadlist *tl = zalloc(pthread_zone_threadlist);
-	bzero(tl, sizeof(struct threadlist));
-
-	thread_t th;
-	kret = pthread_kern->thread_create_workq_waiting(wq->wq_task, wq_unpark_continue, tl, &th);
- 	if (kret != KERN_SUCCESS) {
-		PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 0, 0, 0);
-		goto fail_free;
-	}
-
-	mach_vm_offset_t stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
-
-	mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
-	mach_vm_size_t pthread_size =
-		vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
-	mach_vm_size_t th_allocsize = guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
-
-	kret = mach_vm_map(wq->wq_map, &stackaddr,
-			th_allocsize, page_size-1,
-			VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, NULL,
-			0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-			VM_INHERIT_DEFAULT);
-
-	if (kret != KERN_SUCCESS) {
-		kret = mach_vm_allocate(wq->wq_map,
-				&stackaddr, th_allocsize,
-				VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
-	}
-
-	if (kret != KERN_SUCCESS) {
-		PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 1, 0, 0);
-		goto fail_terminate;
-	}
-
-	/*
-	 * The guard page is at the lowest address
-	 * The stack base is the highest address
-	 */
-	kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
-	if (kret != KERN_SUCCESS) {
-		PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 2, 0, 0);
-		goto fail_vm_deallocate;
-	}
-
-
-	pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE);
-	pthread_kern->thread_static_param(th, TRUE);
-
-	/*
-	 * convert_thread_to_port() consumes a reference
-	 */
-	thread_reference(th);
-	void *sright = (void *)pthread_kern->convert_thread_to_port(th);
-	tl->th_thport = pthread_kern->ipc_port_copyout_send(sright,
-			pthread_kern->task_get_ipcspace(wq->wq_task));
-
-	tl->th_flags = TH_LIST_INITED | TH_LIST_NEW;
-	tl->th_thread = th;
-	tl->th_workq = wq;
-	tl->th_stackaddr = stackaddr;
-	tl->th_priority = WORKQUEUE_NUM_BUCKETS;
-
-	struct uthread *uth;
-	uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-
-	workqueue_lock_spin(wq);
-
-	void *current_tl = pthread_kern->uthread_get_threadlist(uth);
-	if (current_tl == NULL) {
-		pthread_kern->uthread_set_threadlist(uth, tl);
-		TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
-		wq->wq_thidlecount++;
-	} else if (current_tl == WQ_THREADLIST_EXITING_POISON) {
-		/*
-		 * Failed thread creation race: The thread already woke up and has exited.
-		 */
-		PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 3, 0, 0);
-		goto fail_unlock;
-	} else {
-		panic("Unexpected initial threadlist value");
-	}
-
-	PTHREAD_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
-
-	return (TRUE);
-
-fail_unlock:
-	workqueue_unlock(wq);
-	(void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task),
-			tl->th_thport);
-
-fail_vm_deallocate:
-	(void) mach_vm_deallocate(wq->wq_map, stackaddr, th_allocsize);
-
-fail_terminate:
-	if (pthread_kern->thread_will_park_or_terminate) {
-		pthread_kern->thread_will_park_or_terminate(th);
-	}
-	(void)thread_terminate(th);
-	thread_deallocate(th);
-
-fail_free:
-	zfree(pthread_zone_threadlist, tl);
-
-	workqueue_lock_spin(wq);
-	wq->wq_nthreads--;
-
-	return (FALSE);
-}
-
-/**
- * Setup per-process state for the workqueue.
- */
-int
-_workq_open(struct proc *p, __unused int32_t *retval)
-{
-	struct workqueue * wq;
-	char * ptr;
-	uint32_t num_cpus;
-	int error = 0;
-
-	if (pthread_kern->proc_get_register(p) == 0) {
-		return EINVAL;
-	}
-
-	num_cpus = pthread_kern->ml_get_max_cpus();
-
-	if (wq_init_constrained_limit) {
-		uint32_t limit;
-		/*
-		 * set up the limit for the constrained pool
-		 * this is a virtual pool in that we don't
-		 * maintain it on a separate idle and run list
-		 */
-		limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
-
-		if (limit > wq_max_constrained_threads)
-			wq_max_constrained_threads = limit;
-
-		wq_init_constrained_limit = 0;
-
-		if (wq_max_threads > WQ_THACTIVE_BUCKET_HALF) {
-			wq_max_threads = WQ_THACTIVE_BUCKET_HALF;
-		}
-		if (wq_max_threads > pthread_kern->config_thread_max - 20) {
-			wq_max_threads = pthread_kern->config_thread_max - 20;
-		}
-	}
-
-	if (pthread_kern->proc_get_wqptr(p) == NULL) {
-		if (pthread_kern->proc_init_wqptr_or_wait(p) == FALSE) {
-			assert(pthread_kern->proc_get_wqptr(p) != NULL);
-			goto out;
-		}
-
-		ptr = (char *)zalloc(pthread_zone_workqueue);
-		bzero(ptr, sizeof(struct workqueue));
-
-		wq = (struct workqueue *)ptr;
-		wq->wq_proc = p;
-		wq->wq_task = current_task();
-		wq->wq_map  = pthread_kern->current_map();
-
-		// Start the event manager at the priority hinted at by the policy engine
-		int mgr_priority_hint = pthread_kern->task_get_default_manager_qos(current_task());
-		wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-
-		TAILQ_INIT(&wq->wq_thrunlist);
-		TAILQ_INIT(&wq->wq_thidlelist);
-		for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-			TAILQ_INIT(&wq->wq_overcommit_reqlist[i]);
-			TAILQ_INIT(&wq->wq_reqlist[i]);
-		}
-
-		wq->wq_atimer_delayed_call =
-				thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
-						(thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
-		wq->wq_atimer_immediate_call =
-				thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
-						(thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
-
-		lck_spin_init(&wq->wq_lock, pthread_lck_grp, pthread_lck_attr);
-
-		wq->wq_cached_threadreq = zalloc(pthread_zone_threadreq);
-		*(wq_thactive_t *)&wq->wq_thactive =
-				(wq_thactive_t)WQ_THACTIVE_NO_PENDING_REQUEST <<
-				WQ_THACTIVE_QOS_SHIFT;
-
-		pthread_kern->proc_set_wqptr(p, wq);
-
-	}
-out:
-
-	return(error);
-}
-
-/*
- * Routine:	workqueue_mark_exiting
- *
- * Function:	Mark the work queue such that new threads will not be added to the
- *		work queue after we return.
- *
- * Conditions:	Called against the current process.
- */
-void
-_workqueue_mark_exiting(struct proc *p)
-{
-	struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
-	if (!wq) return;
-
-	PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-	workqueue_lock_spin(wq);
-
-	/*
-	 * We arm the add timer without holding the workqueue lock so we need
-	 * to synchronize with any running or soon to be running timers.
-	 *
-	 * Threads that intend to arm the timer atomically OR
-	 * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if
-	 * WQ_EXITING is not present.  So, once we have set WQ_EXITING, we can
-	 * be sure that no new RUNNING flags will be set, but still need to
-	 * wait for the already running timers to complete.
-	 *
-	 * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so
-	 * the check for and sleep until clear is protected.
-	 */
-	uint64_t wq_flags = _wq_flags_or_orig(wq, WQ_EXITING);
-
-	if (wq_flags & WQ_ATIMER_DELAYED_RUNNING) {
-		if (thread_call_cancel(wq->wq_atimer_delayed_call) == TRUE) {
-			wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_DELAYED_RUNNING);
-		}
-	}
-	if (wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) {
-		if (thread_call_cancel(wq->wq_atimer_immediate_call) == TRUE) {
-			wq_flags = _wq_flags_and_orig(wq, ~WQ_ATIMER_IMMEDIATE_RUNNING);
-		}
-	}
-	while ((_wq_flags(wq) & (WQ_ATIMER_DELAYED_RUNNING | WQ_ATIMER_IMMEDIATE_RUNNING)) ||
-			(wq->wq_lflags & WQL_ATIMER_BUSY)) {
-		assert_wait((caddr_t)wq, (THREAD_UNINT));
-		workqueue_unlock(wq);
-
-		thread_block(THREAD_CONTINUE_NULL);
-
-		workqueue_lock_spin(wq);
-	}
-
-	/*
-	 * Save off pending requests, will complete/free them below after unlocking
-	 */
-	TAILQ_HEAD(, threadreq) local_list = TAILQ_HEAD_INITIALIZER(local_list);
-
-	for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-		TAILQ_CONCAT(&local_list, &wq->wq_overcommit_reqlist[i], tr_entry);
-		TAILQ_CONCAT(&local_list, &wq->wq_reqlist[i], tr_entry);
-	}
-
-	/*
-	 * XXX: Can't deferred cancel the event manager request, so just smash it.
-	 */
-	assert((wq->wq_event_manager_threadreq.tr_flags & TR_FLAG_WORKLOOP) == 0);
-	wq->wq_event_manager_threadreq.tr_state = TR_STATE_DEAD;
-
-	workqueue_unlock(wq);
-
-	struct threadreq *tr, *tr_temp;
-	TAILQ_FOREACH_SAFE(tr, &local_list, tr_entry, tr_temp) {
-		_threadreq_cancel(wq, tr);
-	}
-	PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
-}
-
-/*
- * Routine:	workqueue_exit
- *
- * Function:	clean up the work queue structure(s) now that there are no threads
- *		left running inside the work queue (except possibly current_thread).
- *
- * Conditions:	Called by the last thread in the process.
- *		Called against current process.
- */
-void
-_workqueue_exit(struct proc *p)
-{
-	struct workqueue  * wq;
-	struct threadlist  * tl, *tlist;
-	struct uthread	*uth;
-
-	wq = pthread_kern->proc_get_wqptr(p);
-	if (wq != NULL) {
-
-		PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-		pthread_kern->proc_set_wqptr(p, NULL);
-
-		/*
-		 * Clean up workqueue data structures for threads that exited and
-		 * didn't get a chance to clean up after themselves.
-		 */
-		TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
-			assert((tl->th_flags & TH_LIST_RUNNING) != 0);
-
-			pthread_kern->thread_sched_call(tl->th_thread, NULL);
-
-			uth = pthread_kern->get_bsdthread_info(tl->th_thread);
-			if (uth != (struct uthread *)0) {
-				pthread_kern->uthread_set_threadlist(uth, NULL);
-			}
-			TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-
-			/*
-			 * drop our last ref on the thread
-			 */
-			thread_deallocate(tl->th_thread);
-
-			zfree(pthread_zone_threadlist, tl);
-		}
-		TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
-			assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-			assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
-			workqueue_removethread(tl, true, false);
-		}
-		TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlemgrlist, th_entry, tlist) {
-			assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-			assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-			workqueue_removethread(tl, true, false);
-		}
-		if (wq->wq_cached_threadreq) {
-			zfree(pthread_zone_threadreq, wq->wq_cached_threadreq);
-		}
-		thread_call_free(wq->wq_atimer_delayed_call);
-		thread_call_free(wq->wq_atimer_immediate_call);
-		lck_spin_destroy(&wq->wq_lock, pthread_lck_grp);
-
-		for (int i = 0; i < WORKQUEUE_EVENT_MANAGER_BUCKET; i++) {
-			assert(TAILQ_EMPTY(&wq->wq_overcommit_reqlist[i]));
-			assert(TAILQ_EMPTY(&wq->wq_reqlist[i]));
-		}
-
-		zfree(pthread_zone_workqueue, wq);
-
-		PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
-	}
-}
-
-
-#pragma mark workqueue thread manipulation
-
-
-/**
- * Entry point for libdispatch to ask for threads
- */
-static int
-wqops_queue_reqthreads(struct proc *p, int reqcount,
-		pthread_priority_t priority)
-{
-	bool overcommit = _pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-	bool event_manager = _pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-	int class = event_manager ? WORKQUEUE_EVENT_MANAGER_BUCKET :
-			pthread_priority_get_class_index(priority);
-
-	if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS) ||
-			(overcommit && event_manager)) {
-		return EINVAL;
-	}
-
-	struct workqueue *wq;
-	if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-		return EINVAL;
-	}
-
-	workqueue_lock_spin(wq);
-	_threadreq_copy_prepare(wq);
-
-	PTHREAD_TRACE_WQ(TRACE_wq_wqops_reqthreads | DBG_FUNC_NONE, wq, reqcount, priority, 0, 0);
-
-	int tr_flags = 0;
-	if (overcommit) tr_flags |= TR_FLAG_OVERCOMMIT;
-	if (reqcount > 1) {
-		/*
-		 * when libdispatch asks for more than one thread, it wants to achieve
-		 * parallelism. Pacing would be detrimental to this ask, so treat
-		 * these specially to not do the pacing admission check
-		 */
-		tr_flags |= TR_FLAG_NO_PACING;
-	}
-
-	while (reqcount-- && !_wq_exiting(wq)) {
-		struct threadreq req;
-		_threadreq_init_stack(&req, class, tr_flags);
-
-		workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, true);
-
-		workqueue_lock_spin(wq); /* reacquire */
-		_threadreq_copy_prepare(wq);
-	}
-
-	workqueue_unlock(wq);
-
-	return 0;
-}
-
-/*
- * Used by the kevent system to request threads.
- *
- * Currently count is ignored and we always return one thread per invocation.
- */
-static thread_t
-_workq_kevent_reqthreads(struct proc *p, pthread_priority_t priority,
-		bool no_emergency)
-{
-	int wq_run_tr = WQ_RUN_TR_THROTTLED;
-	bool emergency_thread = false;
-	struct threadreq req;
-
-
-	struct workqueue *wq;
-	if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-		return THREAD_NULL;
-	}
-
-	int class = pthread_priority_get_class_index(priority);
-
-	workqueue_lock_spin(wq);
-	bool has_threadreq = _threadreq_copy_prepare_noblock(wq);
-
-	PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, NULL, priority, 0, 0);
-
-	/*
-	 * Skip straight to event manager if that's what was requested
-	 */
-	if ((_pthread_priority_get_qos_newest(priority) == QOS_CLASS_UNSPECIFIED) ||
-			(_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)){
-		goto event_manager;
-	}
-
-	bool will_pace = _wq_should_pace_priority(wq, class);
-	if ((wq->wq_thidlecount == 0 || will_pace) && has_threadreq == false) {
-		/*
-		 * We'll need to persist the request and can't, so return the emergency
-		 * thread instead, which has a persistent request object.
-		 */
-		emergency_thread = true;
-		goto event_manager;
-	}
-
-	/*
-	 * Handle overcommit requests
-	 */
-	if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
-		_threadreq_init_stack(&req, class, TR_FLAG_KEVENT | TR_FLAG_OVERCOMMIT);
-		wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-		goto done;
-	}
-
-	/*
-	 * Handle constrained requests
-	 */
-	boolean_t may_start = may_start_constrained_thread(wq, class, NULL, false);
-	if (may_start || no_emergency) {
-		_threadreq_init_stack(&req, class, TR_FLAG_KEVENT);
-		wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-		goto done;
-	} else {
-		emergency_thread = true;
-	}
-
-
-event_manager:
-	_threadreq_init_stack(&req, WORKQUEUE_EVENT_MANAGER_BUCKET, TR_FLAG_KEVENT);
-	wq_run_tr = workqueue_run_threadreq_and_unlock(p, wq, NULL, &req, false);
-
-done:
-	if (wq_run_tr == WQ_RUN_TR_THREAD_NEEDED && WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-		workqueue_interval_timer_trigger(wq);
-	}
-	return emergency_thread ? (void*)-1 : 0;
-}
-
-thread_t
-_workq_reqthreads(struct proc *p, __assert_only int requests_count,
-		workq_reqthreads_req_t request)
-{
-	assert(requests_count == 1);
-
-	pthread_priority_t priority = request->priority;
-	bool no_emergency = request->count & WORKQ_REQTHREADS_NOEMERGENCY;
-
-	return _workq_kevent_reqthreads(p, priority, no_emergency);
-}
-
-
-int
-workq_kern_threadreq(struct proc *p, workq_threadreq_t _req,
-		enum workq_threadreq_type type, unsigned long priority, int flags)
-{
-	struct workqueue *wq;
-	int ret;
-
-	if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-		return EINVAL;
-	}
-
-	switch (type) {
-	case WORKQ_THREADREQ_KEVENT: {
-		bool no_emergency = flags & WORKQ_THREADREQ_FLAG_NOEMERGENCY;
-		(void)_workq_kevent_reqthreads(p, priority, no_emergency);
-		return 0;
-	}
-	case WORKQ_THREADREQ_WORKLOOP:
-	case WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL: {
-		struct threadreq *req = (struct threadreq *)_req;
-		int req_class = pthread_priority_get_class_index(priority);
-		int req_flags = TR_FLAG_WORKLOOP;
-		if ((_pthread_priority_get_flags(priority) &
-				_PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
-			req_flags |= TR_FLAG_OVERCOMMIT;
-		}
-
-		thread_t thread = current_thread();
-		struct threadlist *tl = util_get_thread_threadlist_entry(thread);
-
-		if (tl && tl != WQ_THREADLIST_EXITING_POISON &&
-				(tl->th_flags & TH_LIST_UNBINDING)) {
-			/*
-			 * we're called back synchronously from the context of
-			 * kevent_qos_internal_unbind from within wqops_thread_return()
-			 * we can try to match up this thread with this request !
-			 */
-		} else {
-			tl = NULL;
-		}
-
-		_threadreq_init_alloced(req, req_class, req_flags);
-		workqueue_lock_spin(wq);
-		PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, priority, 1, 0);
-		ret = workqueue_run_threadreq_and_unlock(p, wq, tl, req, false);
-		if (ret == WQ_RUN_TR_EXITING) {
-			return ECANCELED;
-		}
-		if (ret == WQ_RUN_TR_THREAD_NEEDED) {
-			if (type == WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL) {
-				return EAGAIN;
-			}
-			if (WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-				workqueue_interval_timer_trigger(wq);
-			}
-		}
-		return 0;
-	}
-	case WORKQ_THREADREQ_REDRIVE:
-		PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, 0, 0, 4, 0);
-		workqueue_lock_spin(wq);
-		ret = workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-		if (ret == WQ_RUN_TR_EXITING) {
-			return ECANCELED;
-		}
-		return 0;
-	default:
-		return ENOTSUP;
-	}
-}
-
-int
-workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t _req,
-		enum workq_threadreq_op operation, unsigned long arg1,
-		unsigned long __unused arg2)
-{
-	struct threadreq *req = (struct threadreq *)_req;
-	struct workqueue *wq;
-	int priclass, ret = 0, wq_tr_rc = WQ_RUN_TR_THROTTLED;
-
-	if (req == NULL || (wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
-		return EINVAL;
-	}
-
-	workqueue_lock_spin(wq);
-
-	if (_wq_exiting(wq)) {
-		ret = ECANCELED;
-		goto out_unlock;
-	}
-
-	/*
-	 * Find/validate the referenced request structure
-	 */
-	if (req->tr_state != TR_STATE_WAITING) {
-		ret = EINVAL;
-		goto out_unlock;
-	}
-	assert(req->tr_priority < WORKQUEUE_EVENT_MANAGER_BUCKET);
-	assert(req->tr_flags & TR_FLAG_WORKLOOP);
-
-	switch (operation) {
-	case WORKQ_THREADREQ_CHANGE_PRI:
-	case WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL:
-		priclass = pthread_priority_get_class_index(arg1);
-		PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, arg1, 2, 0);
-		if (req->tr_priority == priclass) {
-			goto out_unlock;
-		}
-		_threadreq_dequeue(wq, req);
-		req->tr_priority = priclass;
-		req->tr_state = TR_STATE_NEW; // what was old is new again
-		wq_tr_rc = workqueue_run_threadreq_and_unlock(p, wq, NULL, req, false);
-		goto out;
-
-	case WORKQ_THREADREQ_CANCEL:
-		PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads | DBG_FUNC_NONE, wq, req, 0, 3, 0);
-		_threadreq_dequeue(wq, req);
-		req->tr_state = TR_STATE_DEAD;
-		break;
-
-	default:
-		ret = ENOTSUP;
-		break;
-	}
-
-out_unlock:
-	workqueue_unlock(wq);
-out:
-	if (wq_tr_rc == WQ_RUN_TR_THREAD_NEEDED) {
-		if (operation == WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL) {
-			ret = EAGAIN;
-		} else if (WQ_TIMER_IMMEDIATE_NEEDED(wq)) {
-			workqueue_interval_timer_trigger(wq);
-		}
-	}
-	return ret;
-}
-
-
-static int
-wqops_thread_return(struct proc *p, struct workqueue *wq)
-{
-	thread_t th = current_thread();
-	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-	struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-
-	/* reset signal mask on the workqueue thread to default state */
-	if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
-		pthread_kern->proc_lock(p);
-		pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
-		pthread_kern->proc_unlock(p);
-	}
-
-	if (wq == NULL || !tl) {
-		return EINVAL;
-	}
-
-	PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_START, tl->th_workq, 0, 0, 0, 0);
-
-	/*
-	 * This squash call has neat semantics: it removes the specified overrides,
-	 * replacing the current requested QoS with the previous effective QoS from
-	 * those overrides.  This means we won't be preempted due to having our QoS
-	 * lowered.  Of course, now our understanding of the thread's QoS is wrong,
-	 * so we'll adjust below.
-	 */
-	bool was_manager = (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-	int new_qos;
-
-	if (!was_manager) {
-		new_qos = pthread_kern->proc_usynch_thread_qos_squash_override_for_resource(th,
-				THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD,
-				THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
-	}
-
-	PTHREAD_TRACE_WQ(TRACE_wq_runitem | DBG_FUNC_END, wq, tl->th_priority, 0, 0, 0);
-
-	workqueue_lock_spin(wq);
-
-	if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
-		unsigned int flags = KEVENT_FLAG_WORKQ;
-		if (was_manager) {
-			flags |= KEVENT_FLAG_WORKQ_MANAGER;
-		}
-
-		tl->th_flags |= TH_LIST_UNBINDING;
-		workqueue_unlock(wq);
-		kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, flags);
-		if (!(tl->th_flags & TH_LIST_UNBINDING)) {
-			_setup_wqthread(p, th, wq, tl, WQ_SETUP_CLEAR_VOUCHER);
-			pthread_kern->unix_syscall_return(EJUSTRETURN);
-			__builtin_unreachable();
-		}
-		workqueue_lock_spin(wq);
-		tl->th_flags &= ~(TH_LIST_KEVENT_BOUND | TH_LIST_UNBINDING);
-	}
-
-	if (!was_manager) {
-		/* Fix up counters from the squash operation. */
-		uint8_t old_bucket = tl->th_priority;
-		uint8_t new_bucket = thread_qos_get_class_index(new_qos);
-
-		if (old_bucket != new_bucket) {
-			_wq_thactive_move(wq, old_bucket, new_bucket);
-			wq->wq_thscheduled_count[old_bucket]--;
-			wq->wq_thscheduled_count[new_bucket]++;
-
-			PTHREAD_TRACE_WQ(TRACE_wq_thread_squash | DBG_FUNC_NONE, wq, tl->th_priority, new_bucket, 0, 0);
-			tl->th_priority = new_bucket;
-			PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_END, tl->th_workq, new_qos, 0, 0, 0);
-		}
-	}
-
-	workqueue_run_threadreq_and_unlock(p, wq, tl, NULL, false);
-	return 0;
-}
-
-/**
- * Multiplexed call to interact with the workqueue mechanism
- */
-int
-_workq_kernreturn(struct proc *p,
-		  int options,
-		  user_addr_t item,
-		  int arg2,
-		  int arg3,
-		  int32_t *retval)
-{
-	struct workqueue *wq;
-	int error = 0;
-
-	if (pthread_kern->proc_get_register(p) == 0) {
-		return EINVAL;
-	}
-
-	switch (options) {
-	case WQOPS_QUEUE_NEWSPISUPP: {
-		/*
-		 * arg2 = offset of serialno into dispatch queue
-		 * arg3 = kevent support
-		 */
-		int offset = arg2;
-		if (arg3 & 0x01){
-			// If we get here, then userspace has indicated support for kevent delivery.
-		}
-
-		pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
-		break;
-	}
-	case WQOPS_QUEUE_REQTHREADS: {
-		/*
-		 * arg2 = number of threads to start
-		 * arg3 = priority
-		 */
-		error = wqops_queue_reqthreads(p, arg2, arg3);
-		break;
-	}
-	case WQOPS_SET_EVENT_MANAGER_PRIORITY: {
-		/*
-		 * arg2 = priority for the manager thread
-		 *
-		 * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the
-		 * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead
-		 * of a QOS value
-		 */
-		pthread_priority_t pri = arg2;
-
-		wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-		if (wq == NULL) {
-			error = EINVAL;
-			break;
-		}
-		workqueue_lock_spin(wq);
-		if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
-			/*
-			 * If userspace passes a scheduling priority, that takes precidence
-			 * over any QoS.  (So, userspace should take care not to accidenatally
-			 * lower the priority this way.)
-			 */
-			uint32_t sched_pri = pri & _PTHREAD_PRIORITY_SCHED_PRI_MASK;
-			if (wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
-				wq->wq_event_manager_priority = MAX(sched_pri, wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_MASK)
-						| _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-			} else {
-				wq->wq_event_manager_priority = sched_pri
-						| _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-			}
-		} else if ((wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
-			int cur_qos = pthread_priority_get_thread_qos(wq->wq_event_manager_priority);
-			int new_qos = pthread_priority_get_thread_qos(pri);
-			wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos, new_qos)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-		}
-		workqueue_unlock(wq);
-		break;
-	}
-	case WQOPS_THREAD_KEVENT_RETURN:
-	case WQOPS_THREAD_WORKLOOP_RETURN:
-		wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-		PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, options, 0, 0, 0);
-		if (item != 0 && arg2 != 0) {
-			int32_t kevent_retval;
-			int ret;
-			if (options == WQOPS_THREAD_KEVENT_RETURN) {
-				ret = kevent_qos_internal(p, -1, item, arg2, item, arg2, NULL, NULL,
-						KEVENT_FLAG_WORKQ | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS,
-						&kevent_retval);
-			} else /* options == WQOPS_THREAD_WORKLOOP_RETURN */ {
-				kqueue_id_t kevent_id = -1;
-				ret = kevent_id_internal(p, &kevent_id, item, arg2, item, arg2,
-						NULL, NULL,
-						KEVENT_FLAG_WORKLOOP | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS,
-						&kevent_retval);
-			}
-			/*
-			 * We shouldn't be getting more errors out than events we put in, so
-			 * reusing the input buffer should always provide enough space.  But,
-			 * the assert is commented out since we get errors in edge cases in the
-			 * process lifecycle.
-			 */
-			//assert(ret == KERN_SUCCESS && kevent_retval >= 0);
-			if (ret != KERN_SUCCESS){
-				error = ret;
-				break;
-			} else if (kevent_retval > 0){
-				assert(kevent_retval <= arg2);
-				*retval = kevent_retval;
-				error = 0;
-				break;
-			}
-		}
-		goto thread_return;
-
-	case WQOPS_THREAD_RETURN:
-		wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
-		PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, options, 0, 0, 0);
-	thread_return:
-		error = wqops_thread_return(p, wq);
-		// NOT REACHED except in case of error
-		assert(error);
-		break;
-
-	case WQOPS_SHOULD_NARROW: {
-		/*
-		 * arg2 = priority to test
-		 * arg3 = unused
-		 */
-		pthread_priority_t priority = arg2;
-		thread_t th = current_thread();
-		struct threadlist *tl = util_get_thread_threadlist_entry(th);
-
-		if (tl == NULL || (tl->th_flags & TH_LIST_CONSTRAINED) == 0) {
-			error = EINVAL;
-			break;
-		}
-
-		int class = pthread_priority_get_class_index(priority);
-		wq = tl->th_workq;
-		workqueue_lock_spin(wq);
-		bool should_narrow = !may_start_constrained_thread(wq, class, tl, false);
-		workqueue_unlock(wq);
-
-		*retval = should_narrow;
-		break;
-	}
-	default:
-		error = EINVAL;
-		break;
-	}
-
-	switch (options) {
-	case WQOPS_THREAD_KEVENT_RETURN:
-	case WQOPS_THREAD_WORKLOOP_RETURN:
-	case WQOPS_THREAD_RETURN:
-		PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, wq, options, 0, 0, 0);
-		break;
-	}
-	return (error);
-}
-
-/*
- * We have no work to do, park ourselves on the idle list.
- *
- * Consumes the workqueue lock and does not return.
- */
-static void __dead2
-parkit(struct workqueue *wq, struct threadlist *tl, thread_t thread)
-{
-	assert(thread == tl->th_thread);
-	assert(thread == current_thread());
-
-	PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-	uint32_t us_to_wait = 0;
-
-	TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-
-	tl->th_flags &= ~TH_LIST_RUNNING;
-	tl->th_flags &= ~TH_LIST_KEVENT;
-	assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
-
-	if (tl->th_flags & TH_LIST_CONSTRAINED) {
-		wq->wq_constrained_threads_scheduled--;
-		tl->th_flags &= ~TH_LIST_CONSTRAINED;
-	}
-
-	_wq_thactive_dec(wq, tl->th_priority);
-	wq->wq_thscheduled_count[tl->th_priority]--;
-	wq->wq_threads_scheduled--;
-	uint32_t thidlecount = ++wq->wq_thidlecount;
-
-	pthread_kern->thread_sched_call(thread, NULL);
-
-	/*
-	 * We'd like to always have one manager thread parked so that we can have
-	 * low latency when we need to bring a manager thread up.  If that idle
-	 * thread list is empty, make this thread a manager thread.
-	 *
-	 * XXX: This doesn't check that there's not a manager thread outstanding,
-	 * so it's based on the assumption that most manager callouts will change
-	 * their QoS before parking.  If that stops being true, this may end up
-	 * costing us more than we gain.
-	 */
-	if (TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
-			tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET){
-		PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-					wq, thread_tid(thread),
-					(tl->th_priority << 16) | WORKQUEUE_EVENT_MANAGER_BUCKET, 2, 0);
-		reset_priority(tl, pthread_priority_from_wq_class_index(wq, WORKQUEUE_EVENT_MANAGER_BUCKET));
-		tl->th_priority = WORKQUEUE_EVENT_MANAGER_BUCKET;
-	}
-
-	if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
-		TAILQ_INSERT_HEAD(&wq->wq_thidlemgrlist, tl, th_entry);
-	} else {
-		TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
-	}
-
-	/*
-	 * When we remove the voucher from the thread, we may lose our importance
-	 * causing us to get preempted, so we do this after putting the thread on
-	 * the idle list.  That when, when we get our importance back we'll be able
-	 * to use this thread from e.g. the kevent call out to deliver a boosting
-	 * message.
-	 */
-	tl->th_flags |= TH_LIST_REMOVING_VOUCHER;
-	workqueue_unlock(wq);
-	if (pthread_kern->thread_will_park_or_terminate) {
-		pthread_kern->thread_will_park_or_terminate(tl->th_thread);
-	}
-	__assert_only kern_return_t kr;
-	kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
-	assert(kr == KERN_SUCCESS);
-	workqueue_lock_spin(wq);
-	tl->th_flags &= ~(TH_LIST_REMOVING_VOUCHER);
-
-	if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
-		if (thidlecount < 101) {
-			us_to_wait = wq_reduce_pool_window_usecs - ((thidlecount-2) * (wq_reduce_pool_window_usecs / 100));
-		} else {
-			us_to_wait = wq_reduce_pool_window_usecs / 100;
-		}
-
-		thread_set_pending_block_hint(thread, kThreadWaitParkedWorkQueue);
-		assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
-				TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
-				wq_reduce_pool_window_usecs/10, NSEC_PER_USEC);
-
-		workqueue_unlock(wq);
-
-		thread_block(wq_unpark_continue);
-		panic("thread_block(wq_unpark_continue) returned!");
-	} else {
-		workqueue_unlock(wq);
-
-		/*
-		 * While we'd dropped the lock to unset our voucher, someone came
-		 * around and made us runnable.  But because we weren't waiting on the
-		 * event their wakeup() was ineffectual.  To correct for that, we just
-		 * run the continuation ourselves.
-		 */
-		wq_unpark_continue(NULL, THREAD_AWAKENED);
-	}
-}
-
-static bool
-may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass,
-		struct threadlist *tl, bool may_start_timer)
-{
-	uint32_t req_qos = _wq_thactive_best_constrained_req_qos(wq);
-	wq_thactive_t thactive;
-
-	if (may_start_timer && at_priclass < req_qos) {
-		/*
-		 * When called from workqueue_run_threadreq_and_unlock() pre-post newest
-		 * higher priorities into the thactive state so that
-		 * workqueue_callback() takes the right decision.
-		 *
-		 * If the admission check passes, workqueue_run_threadreq_and_unlock
-		 * will reset this value before running the request.
-		 */
-		thactive = _wq_thactive_set_best_constrained_req_qos(wq, req_qos,
-				at_priclass);
-#ifdef __LP64__
-		PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 1, (uint64_t)thactive,
-				(uint64_t)(thactive >> 64), 0, 0);
-#endif
-	} else {
-		thactive = _wq_thactive(wq);
-	}
-
-	uint32_t constrained_threads = wq->wq_constrained_threads_scheduled;
-	if (tl && (tl->th_flags & TH_LIST_CONSTRAINED)) {
-		/*
-		 * don't count the current thread as scheduled
-		 */
-		constrained_threads--;
-	}
-	if (constrained_threads >= wq_max_constrained_threads) {
-		PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 1,
-				wq->wq_constrained_threads_scheduled,
-				wq_max_constrained_threads, 0);
-		/*
-		 * we need 1 or more constrained threads to return to the kernel before
-		 * we can dispatch additional work
-		 */
-		return false;
-	}
-
-	/*
-	 * Compute a metric for many how many threads are active.  We find the
-	 * highest priority request outstanding and then add up the number of
-	 * active threads in that and all higher-priority buckets.  We'll also add
-	 * any "busy" threads which are not active but blocked recently enough that
-	 * we can't be sure they've gone idle yet.  We'll then compare this metric
-	 * to our max concurrency to decide whether to add a new thread.
-	 */
-
-	uint32_t busycount, thactive_count;
-
-	thactive_count = _wq_thactive_aggregate_downto_qos(wq, thactive,
-			at_priclass, &busycount, NULL);
-
-	if (tl && tl->th_priority <= at_priclass) {
-		/*
-		 * don't count this thread as currently active
-		 */
-		assert(thactive_count > 0);
-		thactive_count--;
-	}
-
-	if (thactive_count + busycount < wq_max_concurrency[at_priclass]) {
-		PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 2,
-				thactive_count, busycount, 0);
-		return true;
-	} else {
-		PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 3,
-				thactive_count, busycount, 0);
-	}
-
-	if (busycount && may_start_timer) {
-		/*
-		 * If this is called from the add timer, we won't have another timer
-		 * fire when the thread exits the "busy" state, so rearm the timer.
-		 */
-		if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-			workqueue_interval_timer_start(wq);
-		}
-	}
-
-	return false;
-}
-
-static struct threadlist *
-pop_from_thidlelist(struct workqueue *wq, uint32_t priclass)
-{
-	assert(wq->wq_thidlecount);
-
-	struct threadlist *tl = NULL;
-
-	if (!TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
-			(priclass == WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlelist))){
-		tl = TAILQ_FIRST(&wq->wq_thidlemgrlist);
-		TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
-		assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
-	} else if (!TAILQ_EMPTY(&wq->wq_thidlelist) &&
-			(priclass != WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlemgrlist))){
-		tl = TAILQ_FIRST(&wq->wq_thidlelist);
-		TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-		assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
-	} else {
-		panic("pop_from_thidlelist called with no threads available");
-	}
-	assert((tl->th_flags & TH_LIST_RUNNING) == 0);
-
-	assert(wq->wq_thidlecount);
-	wq->wq_thidlecount--;
-
-	TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
-
-	tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
-
-	wq->wq_threads_scheduled++;
-	wq->wq_thscheduled_count[priclass]++;
-	_wq_thactive_inc(wq, priclass);
-	return tl;
-}
-
-static pthread_priority_t
-pthread_priority_from_wq_class_index(struct workqueue *wq, int index)
-{
-	if (index == WORKQUEUE_EVENT_MANAGER_BUCKET){
-		return wq->wq_event_manager_priority;
-	} else {
-		return class_index_get_pthread_priority(index);
-	}
-}
-
-static void
-reset_priority(struct threadlist *tl, pthread_priority_t pri)
-{
-	kern_return_t ret;
-	thread_t th = tl->th_thread;
-
-	if ((pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
-		ret = pthread_kern->thread_set_workq_qos(th, pthread_priority_get_thread_qos(pri), 0);
-		assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-		if (tl->th_flags & TH_LIST_EVENT_MGR_SCHED_PRI) {
-
-			/* Reset priority to default (masked by QoS) */
-
-			ret = pthread_kern->thread_set_workq_pri(th, 31, POLICY_TIMESHARE);
-			assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-			tl->th_flags &= ~TH_LIST_EVENT_MGR_SCHED_PRI;
-		}
-	} else {
-		ret = pthread_kern->thread_set_workq_qos(th, THREAD_QOS_UNSPECIFIED, 0);
-		assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-		ret = pthread_kern->thread_set_workq_pri(th, (pri & (~_PTHREAD_PRIORITY_FLAGS_MASK)), POLICY_TIMESHARE);
-		assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
-
-		tl->th_flags |= TH_LIST_EVENT_MGR_SCHED_PRI;
-	}
-}
-
-/*
- * Picks the best request to run, and returns the best overcommit fallback
- * if the best pick is non overcommit and risks failing its admission check.
- */
-static struct threadreq *
-workqueue_best_threadreqs(struct workqueue *wq, struct threadlist *tl,
-		struct threadreq **fallback)
-{
-	struct threadreq *req, *best_req = NULL;
-	int priclass, prilimit;
-
-	if ((wq->wq_event_manager_threadreq.tr_state == TR_STATE_WAITING) &&
-			((wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0) ||
-			(tl && tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) {
-		/*
-		 * There's an event manager request and either:
-		 *   - no event manager currently running
-		 *   - we are re-using the event manager
-		 */
-		req = &wq->wq_event_manager_threadreq;
-		PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, req, 1, 0, 0);
-		return req;
-	}
-
-	if (tl) {
-		prilimit = WORKQUEUE_EVENT_MANAGER_BUCKET;
-	} else {
-		prilimit = _wq_highest_paced_priority(wq);
-	}
-	for (priclass = 0; priclass < prilimit; priclass++) {
-		req = TAILQ_FIRST(&wq->wq_overcommit_reqlist[priclass]);
-		if (req) {
-			PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, req, 2, 0, 0);
-			if (best_req) {
-				*fallback = req;
-			} else {
-				best_req = req;
-			}
-			break;
-		}
-		if (!best_req) {
-			best_req = TAILQ_FIRST(&wq->wq_reqlist[priclass]);
-			if (best_req) {
-				PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select | DBG_FUNC_NONE, wq, best_req, 3, 0, 0);
-			}
-		}
-	}
-	return best_req;
-}
-
-/**
- * Runs a thread request on a thread
- *
- * - if thread is THREAD_NULL, will find a thread and run the request there.
- *   Otherwise, the thread must be the current thread.
- *
- * - if req is NULL, will find the highest priority request and run that.  If
- *   it is not NULL, it must be a threadreq object in state NEW.  If it can not
- *   be run immediately, it will be enqueued and moved to state WAITING.
- *
- *   Either way, the thread request object serviced will be moved to state
- *   PENDING and attached to the threadlist.
- *
- *   Should be called with the workqueue lock held.  Will drop it.
- *
- *   WARNING: _workq_kevent_reqthreads needs to be able to preflight any
- *   admission checks in this function.  If you are changing this function,
- *   keep that one up-to-date.
- *
- * - if parking_tl is non NULL, then the current thread is parking. This will
- *   try to reuse this thread for a request. If no match is found, it will be
- *   parked.
- */
-static int
-workqueue_run_threadreq_and_unlock(proc_t p, struct workqueue *wq,
-		struct threadlist *parking_tl, struct threadreq *req,
-		bool may_add_new_thread)
-{
-	struct threadreq *incoming_req = req;
-
-	struct threadlist *tl = parking_tl;
-	int rc = WQ_RUN_TR_THROTTLED;
-
-	assert(tl == NULL || tl->th_thread == current_thread());
-	assert(req == NULL || req->tr_state == TR_STATE_NEW);
-	assert(!may_add_new_thread || !tl);
-
-	PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq | DBG_FUNC_START, wq, req,
-			tl ? thread_tid(tl->th_thread) : 0,
-			req ? (req->tr_priority << 16 | req->tr_flags) : 0, 0);
-
-	/*
-	 * Special cases when provided an event manager request
-	 */
-	if (req && req->tr_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-		// Clients must not rely on identity of event manager requests
-		assert(req->tr_flags & TR_FLAG_ONSTACK);
-		// You can't be both overcommit and event manager
-		assert((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0);
-
-		/*
-		 * We can only ever have one event manager request, so coalesce them if
-		 * there's already one outstanding.
-		 */
-		if (wq->wq_event_manager_threadreq.tr_state == TR_STATE_WAITING) {
-			PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_mgr_merge | DBG_FUNC_NONE, wq, req, 0, 0, 0);
-
-			struct threadreq *existing_req = &wq->wq_event_manager_threadreq;
-			if (req->tr_flags & TR_FLAG_KEVENT) {
-				existing_req->tr_flags |= TR_FLAG_KEVENT;
-			}
-
-			req = existing_req;
-			incoming_req = NULL;
-		}
-
-		if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
-				(!tl || tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET)){
-			/*
-			 * There can only be one event manager running at a time.
-			 */
-			PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 1, 0, 0, 0);
-			goto done;
-		}
-	}
-
-again: // Start again after creating a thread
-
-	if (_wq_exiting(wq)) {
-		rc = WQ_RUN_TR_EXITING;
-		goto exiting;
-	}
-
-	/*
-	 * Thread request selection and admission control
-	 */
-	struct threadreq *fallback = NULL;
-	if (req) {
-		if ((req->tr_flags & TR_FLAG_NO_PACING) == 0 &&
-				_wq_should_pace_priority(wq, req->tr_priority)) {
-			/*
-			 * If a request fails the pacing admission check, then thread
-			 * requests are redriven when the pacing thread is finally scheduled
-			 * when it calls _wq_pacing_end() in wq_unpark_continue().
-			 */
-			goto done;
-		}
-	} else if (wq->wq_reqcount == 0) {
-		PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 2, 0, 0, 0);
-		goto done;
-	} else if ((req = workqueue_best_threadreqs(wq, tl, &fallback)) == NULL) {
-		PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 3, 0, 0, 0);
-		goto done;
-	}
-
-	if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0 &&
-			(req->tr_priority < WORKQUEUE_EVENT_MANAGER_BUCKET)) {
-		if (!may_start_constrained_thread(wq, req->tr_priority, parking_tl, true)) {
-			if (!fallback) {
-				PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 4, 0, 0, 0);
-				goto done;
-			}
-			assert(req->tr_state == TR_STATE_WAITING);
-			req = fallback;
-		}
-	}
-
-	/*
-	 * Thread selection.
-	 */
-	if (parking_tl) {
-		if (tl->th_priority != req->tr_priority) {
-			_wq_thactive_move(wq, tl->th_priority, req->tr_priority);
-			wq->wq_thscheduled_count[tl->th_priority]--;
-			wq->wq_thscheduled_count[req->tr_priority]++;
-		}
-		PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-				wq, 1, thread_tid(tl->th_thread), 0, 0);
-	} else if (wq->wq_thidlecount) {
-		tl = pop_from_thidlelist(wq, req->tr_priority);
-		/*
-		 * This call will update wq_thscheduled_count and wq_thactive_count for
-		 * the provided priority.  It will not set the returned thread to that
-		 * priority.  This matches the behavior of the parking_tl clause above.
-		 */
-		PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-				wq, 2, thread_tid(tl->th_thread), 0, 0);
-	} else /* no idle threads */ {
-		if (!may_add_new_thread || wq->wq_nthreads >= wq_max_threads) {
-			PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 5,
-					may_add_new_thread, wq->wq_nthreads, 0);
-			if (wq->wq_nthreads < wq_max_threads) {
-				rc = WQ_RUN_TR_THREAD_NEEDED;
-			}
-			goto done;
-		}
-
-		bool added_thread = workqueue_addnewthread(p, wq);
-		/*
-		 * workqueue_addnewthread will drop and re-take the lock, so we
-		 * need to ensure we still have a cached request.
-		 *
-		 * It also means we have to pick a new request, since our old pick may
-		 * not be valid anymore.
-		 */
-		req = incoming_req;
-		if (req && (req->tr_flags & TR_FLAG_ONSTACK)) {
-			_threadreq_copy_prepare(wq);
-		}
-
-		if (added_thread) {
-			PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select | DBG_FUNC_NONE,
-					wq, 3, 0, 0, 0);
-			goto again;
-		} else if (_wq_exiting(wq)) {
-			rc = WQ_RUN_TR_EXITING;
-			goto exiting;
-		} else {
-			PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq | DBG_FUNC_END, wq, 6, 0, 0, 0);
-			/*
-			 * Something caused thread creation to fail.  Kick off the timer in
-			 * the hope that it'll succeed next time.
-			 */
-			if (WQ_TIMER_DELAYED_NEEDED(wq)) {
-				workqueue_interval_timer_start(wq);
-			}
-			goto done;
-		}
-	}
-
-	/*
-	 * Setup thread, mark request as complete and run with it.
-	 */
-	if (req->tr_state == TR_STATE_WAITING) {
-		_threadreq_dequeue(wq, req);
-	}
-	if (tl->th_priority != req->tr_priority) {
-		PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-					wq, thread_tid(tl->th_thread),
-					(tl->th_priority << 16) | req->tr_priority, 1, 0);
-		reset_priority(tl, pthread_priority_from_wq_class_index(wq, req->tr_priority));
-		tl->th_priority = (uint8_t)req->tr_priority;
-	}
-	if (req->tr_flags & TR_FLAG_OVERCOMMIT) {
-		if ((tl->th_flags & TH_LIST_CONSTRAINED) != 0) {
-			tl->th_flags &= ~TH_LIST_CONSTRAINED;
-			wq->wq_constrained_threads_scheduled--;
-		}
-	} else {
-		if ((tl->th_flags & TH_LIST_CONSTRAINED) == 0) {
-			tl->th_flags |= TH_LIST_CONSTRAINED;
-			wq->wq_constrained_threads_scheduled++;
-		}
-	}
-
-	if (!parking_tl && !(req->tr_flags & TR_FLAG_NO_PACING)) {
-		_wq_pacing_start(wq, tl);
-	}
-	if ((req->tr_flags & TR_FLAG_OVERCOMMIT) == 0) {
-		uint32_t old_qos, new_qos;
-
-		/*
-		 * If we are scheduling a constrained thread request, we may need to
-		 * update the best constrained qos in the thactive atomic state.
-		 */
-		for (new_qos = 0; new_qos < WQ_THACTIVE_NO_PENDING_REQUEST; new_qos++) {
-			if (TAILQ_FIRST(&wq->wq_reqlist[new_qos]))
-				break;
-		}
-		old_qos = _wq_thactive_best_constrained_req_qos(wq);
-		if (old_qos != new_qos) {
-			wq_thactive_t v = _wq_thactive_set_best_constrained_req_qos(wq,
-					old_qos, new_qos);
-#ifdef __LP64__
-			PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 2, (uint64_t)v,
-					(uint64_t)(v >> 64), 0, 0);
-#else
-			PTHREAD_TRACE_WQ(TRACE_wq_thactive_update, 2, v, 0, 0, 0);
-#endif
-		}
-	}
-	{
-		uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI;
-		if (req->tr_flags & TR_FLAG_OVERCOMMIT)
-			upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
-		if (req->tr_flags & TR_FLAG_KEVENT)
-			upcall_flags |= WQ_FLAG_THREAD_KEVENT;
-		if (req->tr_flags & TR_FLAG_WORKLOOP)
-			upcall_flags |= WQ_FLAG_THREAD_WORKLOOP | WQ_FLAG_THREAD_KEVENT;
-		if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET)
-			upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
-		tl->th_upcall_flags = upcall_flags >> WQ_FLAG_THREAD_PRIOSHIFT;
-	}
-	if (req->tr_flags & TR_FLAG_KEVENT) {
-		tl->th_flags |= TH_LIST_KEVENT;
-	} else {
-		tl->th_flags &= ~TH_LIST_KEVENT;
-	}
-	return _threadreq_complete_and_unlock(p, wq, req, tl);
-
-done:
-	if (incoming_req) {
-		_threadreq_enqueue(wq, incoming_req);
-	}
-
-exiting:
-
-	if (parking_tl && !(parking_tl->th_flags & TH_LIST_UNBINDING)) {
-		parkit(wq, parking_tl, parking_tl->th_thread);
-		__builtin_unreachable();
-	}
-
-	workqueue_unlock(wq);
-
-	return rc;
-}
-
-/**
- * parked thread wakes up
- */
-static void __dead2
-wq_unpark_continue(void* __unused ptr, wait_result_t wait_result)
-{
-	boolean_t first_use = false;
-	thread_t th = current_thread();
-	proc_t p = current_proc();
-
-	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-	if (uth == NULL) goto done;
-
-	struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
-	if (wq == NULL) goto done;
-
-	workqueue_lock_spin(wq);
-
-	struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
-	assert(tl != WQ_THREADLIST_EXITING_POISON);
-	if (tl == NULL) {
-		/*
-		 * We woke up before addnewthread() was finished setting us up.  Go
-		 * ahead and exit, but before we do poison the threadlist variable so
-		 * that addnewthread() doesn't think we are valid still.
-		 */
-		pthread_kern->uthread_set_threadlist(uth, WQ_THREADLIST_EXITING_POISON);
-		workqueue_unlock(wq);
-		goto done;
-	}
-
-	assert(tl->th_flags & TH_LIST_INITED);
-
-	if ((tl->th_flags & TH_LIST_NEW)){
-		tl->th_flags &= ~(TH_LIST_NEW);
-		first_use = true;
-	}
-
-	if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
-		/*
-		 * The normal wakeup path.
-		 */
-		goto return_to_user;
-	}
-
-	if ((tl->th_flags & TH_LIST_RUNNING) == 0 &&
-			wait_result == THREAD_TIMED_OUT &&
-			tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET &&
-			TAILQ_FIRST(&wq->wq_thidlemgrlist) == tl &&
-			TAILQ_NEXT(tl, th_entry) == NULL){
-		/*
-		 * If we are the only idle manager and we pop'ed for self-destruction,
-		 * then don't actually exit.  Instead, free our stack to save some
-		 * memory and re-park.
-		 */
-
-		workqueue_unlock(wq);
-
-		vm_map_t vmap = wq->wq_map;
-
-		// Keep this in sync with _setup_wqthread()
-		const vm_size_t       guardsize = vm_map_page_size(vmap);
-		const user_addr_t     freeaddr = (user_addr_t)tl->th_stackaddr + guardsize;
-		const vm_map_offset_t freesize = vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, vm_map_page_mask(vmap)) - guardsize;
-
-		__assert_only int kr = mach_vm_behavior_set(vmap, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
-#if MACH_ASSERT
-		if (kr != KERN_SUCCESS && kr != KERN_INVALID_ADDRESS) {
-			os_log_error(OS_LOG_DEFAULT, "unable to make thread stack reusable (kr: %d)", kr);
-		}
-#endif
-
-		workqueue_lock_spin(wq);
-
-		if ( !(tl->th_flags & TH_LIST_RUNNING)) {
-			thread_set_pending_block_hint(th, kThreadWaitParkedWorkQueue);
-			assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE));
-
-			workqueue_unlock(wq);
-
-			thread_block(wq_unpark_continue);
-			__builtin_unreachable();
-		}
-	}
-
-	if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
-		assert((tl->th_flags & TH_LIST_BUSY) == 0);
-		if (!first_use) {
-			PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, 0, 0, 0, 0);
-		}
-		/*
-		 * We were set running, but not for the purposes of actually running.
-		 * This could be because the timer elapsed.  Or it could be because the
-		 * thread aborted.  Either way, we need to return to userspace to exit.
-		 *
-		 * The call to workqueue_removethread will consume the lock.
-		 */
-
-		if (!first_use &&
-				(tl->th_priority < qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS) ||
-				(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))) {
-			// Reset the QoS to something low for the pthread cleanup
-			PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority | DBG_FUNC_NONE,
-						wq, thread_tid(th),
-						(tl->th_priority << 16) | qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS), 3, 0);
-			pthread_priority_t cleanup_pri = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
-			reset_priority(tl, cleanup_pri);
-		}
-
-		workqueue_removethread(tl, 0, first_use);
-
-		if (first_use){
-			pthread_kern->thread_bootstrap_return();
-		} else {
-			pthread_kern->unix_syscall_return(0);
-		}
-		__builtin_unreachable();
-	}
-
-	/*
-	 * The timer woke us up or the thread was aborted.  However, we have
-	 * already started to make this a runnable thread.  Wait for that to
-	 * finish, then continue to userspace.
-	 */
-	while ((tl->th_flags & TH_LIST_BUSY)) {
-		assert_wait((caddr_t)tl, (THREAD_UNINT));
-
-		workqueue_unlock(wq);
-
-		thread_block(THREAD_CONTINUE_NULL);
-
-		workqueue_lock_spin(wq);
-	}
-
-return_to_user:
-	if (!first_use) {
-		PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, 0, 0, 0, 0);
-	}
-	if (_wq_pacing_end(wq, tl) && wq->wq_reqcount) {
-		workqueue_run_threadreq_and_unlock(p, wq, NULL, NULL, true);
-	} else {
-		workqueue_unlock(wq);
-	}
-	_setup_wqthread(p, th, wq, tl, first_use ? WQ_SETUP_FIRST_USE : 0);
-	pthread_kern->thread_sched_call(th, workqueue_callback);
-done:
-	if (first_use){
-		pthread_kern->thread_bootstrap_return();
-	} else {
-		pthread_kern->unix_syscall_return(EJUSTRETURN);
-	}
-	panic("Our attempt to return to userspace failed...");
-}
+	if (kevent_data_available == WQ_KEVENT_DATA_SIZE) {
+		workq_thread_set_top_addr(th_addrs, kevent_id_addr);
+	} else {
+		workq_thread_set_top_addr(th_addrs,
+				kevent_data_buf + kevent_data_available);
+	}
+	*kevent_count_out = kevent_count;
+	*kevent_list_out = kevent_list;
+	return ret;
+}
 
 /**
  * configures initial thread stack/registers to jump into:
@@ -3787,282 +840,90 @@ done:
  * |guard page | guardsize
  * |-----------| th_stackaddr
  */
+__attribute__((noreturn,noinline))
 void
-_setup_wqthread(proc_t p, thread_t th, struct workqueue *wq,
-		struct threadlist *tl, int setup_flags)
+workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
+		mach_port_name_t kport, int th_qos __unused, int setup_flags, int upcall_flags)
 {
-	int error;
-	if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) {
-		/*
-		 * For preemption reasons, we want to reset the voucher as late as
-		 * possible, so we do it in two places:
-		 *   - Just before parking (i.e. in parkit())
-		 *   - Prior to doing the setup for the next workitem (i.e. here)
-		 *
-		 * Those two places are sufficient to ensure we always reset it before
-		 * it goes back out to user space, but be careful to not break that
-		 * guarantee.
-		 */
-		__assert_only kern_return_t kr;
-		kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
-		assert(kr == KERN_SUCCESS);
-	}
-
-	uint32_t upcall_flags = tl->th_upcall_flags << WQ_FLAG_THREAD_PRIOSHIFT;
-	if (!(setup_flags & WQ_SETUP_FIRST_USE)) {
-		upcall_flags |= WQ_FLAG_THREAD_REUSE;
-	}
-
-	/*
-	 * Put the QoS class value into the lower bits of the reuse_thread register, this is where
-	 * the thread priority used to be stored anyway.
-	 */
-	pthread_priority_t priority = pthread_priority_from_wq_class_index(wq, tl->th_priority);
-	upcall_flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
-
-	const vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
-	const vm_size_t stack_gap_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_REDZONE_LEN;
-	const vm_size_t stack_align_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_STK_ALIGN;
-
-	user_addr_t pthread_self_addr = (user_addr_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET);
-	user_addr_t stack_top_addr = (user_addr_t)((pthread_self_addr - stack_gap_min) & -stack_align_min);
-	user_addr_t stack_bottom_addr = (user_addr_t)(tl->th_stackaddr + guardsize);
+	struct workq_thread_addrs th_addrs;
+	bool first_use = (setup_flags & WQ_SETUP_FIRST_USE);
+	user_addr_t kevent_list = NULL;
+	int kevent_count = 0;
 
-	user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
-	if (!wqstart_fnptr) {
-		panic("workqueue thread start function pointer is NULL");
-	}
+	workq_thread_get_addrs(map, stackaddr, &th_addrs);
 
-	if (setup_flags & WQ_SETUP_FIRST_USE) {
+	if (first_use) {
 		uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
 		if (tsd_offset) {
-			mach_vm_offset_t th_tsd_base = (mach_vm_offset_t)pthread_self_addr + tsd_offset;
-			kern_return_t kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
+			mach_vm_offset_t th_tsd_base = th_addrs.self + tsd_offset;
+			kern_return_t kret = pthread_kern->thread_set_tsd_base(th,
+					th_tsd_base);
 			if (kret == KERN_SUCCESS) {
 				upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
 			}
 		}
 
 		/*
-		* Pre-fault the first page of the new thread's stack and the page that will
-		* contain the pthread_t structure.
-		*/
-		vm_map_t vmap = pthread_kern->current_map();
-		if (vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
-				vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap))){
-			vm_fault( vmap,
-					vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
-					VM_PROT_READ | VM_PROT_WRITE,
-					FALSE,
-					THREAD_UNINT, NULL, 0);
+		 * Pre-fault the first page of the new thread's stack and the page that will
+		 * contain the pthread_t structure.
+		 */
+		vm_map_offset_t mask = vm_map_page_mask(map);
+		vm_map_offset_t th_page = vm_map_trunc_page_mask(th_addrs.self, mask);
+		vm_map_offset_t stk_page = vm_map_trunc_page_mask(th_addrs.stack_top - 1, mask);
+		if (th_page != stk_page) {
+			vm_fault(map, stk_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
 		}
-		vm_fault( vmap,
-				vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap)),
-				VM_PROT_READ | VM_PROT_WRITE,
-				FALSE,
-				THREAD_UNINT, NULL, 0);
+		vm_fault(map, th_page, VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);
 	}
 
-	user_addr_t kevent_list = NULL;
-	int kevent_count = 0;
-	if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
-		bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP;
-
-		kevent_list = pthread_self_addr - WQ_KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
-		kevent_count = WQ_KEVENT_LIST_LEN;
-
-		user_addr_t kevent_id_addr = kevent_list;
-		if (workloop) {
-			/*
-			 * The kevent ID goes just below the kevent list.  Sufficiently new
-			 * userspace will know to look there.  Old userspace will just
-			 * ignore it.
-			 */
-			kevent_id_addr -= sizeof(kqueue_id_t);
-		}
-
-		user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE;
-		user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE;
-
-		int32_t events_out = 0;
-
-		assert(tl->th_flags | TH_LIST_KEVENT_BOUND);
+	if (setup_flags & WQ_SETUP_EXIT_THREAD) {
+		kevent_count = WORKQ_EXIT_THREAD_NKEVENT;
+	} else if (upcall_flags & WQ_FLAG_THREAD_KEVENT) {
 		unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
-		if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
-			flags |= KEVENT_FLAG_WORKQ_MANAGER;
-		}
-		int ret = 0;
-		if (workloop) {
-			flags |= KEVENT_FLAG_WORKLOOP;
-			kqueue_id_t kevent_id = -1;
-			ret = kevent_id_internal(p, &kevent_id,
-					NULL, 0, kevent_list, kevent_count,
-					kevent_data_buf, &kevent_data_available,
-					flags, &events_out);
-			copyout(&kevent_id, kevent_id_addr, sizeof(kevent_id));
-		} else {
-			flags |= KEVENT_FLAG_WORKQ;
-			ret = kevent_qos_internal(p,
-					class_index_get_thread_qos(tl->th_priority),
-					NULL, 0, kevent_list, kevent_count,
-					kevent_data_buf, &kevent_data_available,
-					flags, &events_out);
-		}
-
-		// squash any errors into just empty output
-		if (ret != KERN_SUCCESS || events_out == -1){
-			events_out = 0;
-			kevent_data_available = WQ_KEVENT_DATA_SIZE;
-		}
-
-		// We shouldn't get data out if there aren't events available
-		assert(events_out != 0 || kevent_data_available == WQ_KEVENT_DATA_SIZE);
-
-		if (events_out > 0){
-			if (kevent_data_available == WQ_KEVENT_DATA_SIZE){
-				stack_top_addr = (kevent_id_addr - stack_gap_min) & -stack_align_min;
-			} else {
-				stack_top_addr = (kevent_data_buf + kevent_data_available - stack_gap_min) & -stack_align_min;
-			}
-
-			kevent_count = events_out;
-		} else {
-			kevent_list = NULL;
-			kevent_count = 0;
-		}
+		workq_kevent(p, &th_addrs, upcall_flags, NULL, 0, flags,
+				&kevent_list, &kevent_count);
 	}
 
-	PTHREAD_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, wq, 0, 0, 0, 0);
-
-#if defined(__i386__) || defined(__x86_64__)
-	if (proc_is64bit(p) == 0) {
-		x86_thread_state32_t state = {
-			.eip = (unsigned int)wqstart_fnptr,
-			.eax = /* arg0 */ (unsigned int)pthread_self_addr,
-			.ebx = /* arg1 */ (unsigned int)tl->th_thport,
-			.ecx = /* arg2 */ (unsigned int)stack_bottom_addr,
-			.edx = /* arg3 */ (unsigned int)kevent_list,
-			.edi = /* arg4 */ (unsigned int)upcall_flags,
-			.esi = /* arg5 */ (unsigned int)kevent_count,
-
-			.esp = (int)((vm_offset_t)stack_top_addr),
-		};
+	workq_set_register_state(p, th, &th_addrs, kport,
+			kevent_list, upcall_flags, kevent_count);
 
-		error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
-		if (error != KERN_SUCCESS) {
-			panic(__func__ ": thread_set_wq_state failed: %d", error);
-		}
+	if (first_use) {
+		pthread_kern->thread_bootstrap_return();
 	} else {
-		x86_thread_state64_t state64 = {
-			// x86-64 already passes all the arguments in registers, so we just put them in their final place here
-			.rip = (uint64_t)wqstart_fnptr,
-			.rdi = (uint64_t)pthread_self_addr,
-			.rsi = (uint64_t)tl->th_thport,
-			.rdx = (uint64_t)stack_bottom_addr,
-			.rcx = (uint64_t)kevent_list,
-			.r8  = (uint64_t)upcall_flags,
-			.r9  = (uint64_t)kevent_count,
-
-			.rsp = (uint64_t)(stack_top_addr)
-		};
-
-		error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
-		if (error != KERN_SUCCESS) {
-			panic(__func__ ": thread_set_wq_state failed: %d", error);
-		}
+		pthread_kern->unix_syscall_return(EJUSTRETURN);
 	}
-#else
-#error setup_wqthread  not defined for this architecture
-#endif
-}
-
-#if DEBUG
-static int wq_kevent_test SYSCTL_HANDLER_ARGS {
-	//(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
-#pragma unused(oidp, arg1, arg2)
-	int error;
-	struct workq_reqthreads_req_s requests[64] = {};
-
-	if (req->newlen > sizeof(requests) || req->newlen < sizeof(struct workq_reqthreads_req_s))
-		return EINVAL;
-
-	error = copyin(req->newptr, requests, req->newlen);
-	if (error) return error;
-
-	_workq_reqthreads(req->p, (int)(req->newlen / sizeof(struct workq_reqthreads_req_s)), requests);
-
-	return 0;
+	__builtin_unreachable();
 }
-#endif // DEBUG
-
-#pragma mark - Misc
 
 int
-_fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
+workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
+		user_addr_t stackaddr, mach_port_name_t kport,
+		user_addr_t events, int nevents, int upcall_flags)
 {
-	struct workqueue * wq;
-	int error = 0;
-	int	activecount;
-
-	if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
-		return EINVAL;
-	}
-
-	/*
-	 * This is sometimes called from interrupt context by the kperf sampler.
-	 * In that case, it's not safe to spin trying to take the lock since we
-	 * might already hold it.  So, we just try-lock it and error out if it's
-	 * already held.  Since this is just a debugging aid, and all our callers
-	 * are able to handle an error, that's fine.
-	 */
-	bool locked = workqueue_lock_try(wq);
-	if (!locked) {
-		return EBUSY;
-	}
-
-	activecount = _wq_thactive_aggregate_downto_qos(wq, _wq_thactive(wq),
-			WORKQUEUE_NUM_BUCKETS - 1, NULL, NULL);
-	pwqinfo->pwq_nthreads = wq->wq_nthreads;
-	pwqinfo->pwq_runthreads = activecount;
-	pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
-	pwqinfo->pwq_state = 0;
-
-	if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-		pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
-	}
-
-	if (wq->wq_nthreads >= wq_max_threads) {
-		pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
-	}
-
-	workqueue_unlock(wq);
-	return(error);
-}
+	struct workq_thread_addrs th_addrs;
+	user_addr_t kevent_list = NULL;
+	int kevent_count = 0, error;
+	__assert_only kern_return_t kr;
 
-uint32_t
-_get_pwq_state_kdp(proc_t p)
-{
-	if (p == NULL) {
-		return 0;
-	}
+	workq_thread_get_addrs(map, stackaddr, &th_addrs);
 
-	struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
+	unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE |
+			KEVENT_FLAG_PARKING;
+	error = workq_kevent(p, &th_addrs, upcall_flags, events, nevents, flags,
+			&kevent_list, &kevent_count);
 
-	if (wq == NULL || workqueue_lock_spin_is_acquired_kdp(wq)) {
-		return 0;
+	if (error || kevent_count == 0) {
+		return error;
 	}
 
-	uint32_t pwq_state = WQ_FLAGS_AVAILABLE;
-
-	if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-		pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
-	}
+	kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
+	assert(kr == KERN_SUCCESS);
 
-	if (wq->wq_nthreads >= wq_max_threads) {
-		pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
-	}
+	workq_set_register_state(p, th, &th_addrs, kport,
+			kevent_list, upcall_flags, kevent_count);
 
-	return pwq_state;
+	pthread_kern->unix_syscall_return(EJUSTRETURN);
+	__builtin_unreachable();
 }
 
 int
@@ -4083,44 +944,16 @@ _pthread_init(void)
 	 * allocate the lock attribute for pthread synchronizers
 	 */
 	pthread_lck_attr = lck_attr_alloc_init();
-
 	pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
 
 	pth_global_hashinit();
 	psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
 	psynch_zoneinit();
 
-	pthread_zone_workqueue = zinit(sizeof(struct workqueue),
-			1024 * sizeof(struct workqueue), 8192, "pthread.workqueue");
-	pthread_zone_threadlist = zinit(sizeof(struct threadlist),
-			1024 * sizeof(struct threadlist), 8192, "pthread.threadlist");
-	pthread_zone_threadreq = zinit(sizeof(struct threadreq),
-			1024 * sizeof(struct threadreq), 8192, "pthread.threadreq");
-
 	int policy_bootarg;
 	if (PE_parse_boot_argn("pthread_mutex_default_policy", &policy_bootarg, sizeof(policy_bootarg))) {
 		pthread_mutex_default_policy = policy_bootarg;
 	}
 
-	/*
-	 * register sysctls
-	 */
-	sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs);
-	sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs);
-	sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs);
-	sysctl_register_oid(&sysctl__kern_wq_max_threads);
-	sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
-	sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
 	sysctl_register_oid(&sysctl__kern_pthread_mutex_default_policy);
-
-#if DEBUG
-	sysctl_register_oid(&sysctl__debug_wq_kevent_test);
-#endif
-
-	for (int i = 0; i < WORKQUEUE_NUM_BUCKETS; i++) {
-		uint32_t thread_qos = _wq_bucket_to_thread_qos(i);
-		wq_max_concurrency[i] = pthread_kern->qos_max_parallelism(thread_qos,
-				QOS_PARALLELISM_COUNT_LOGICAL);
-	}
-	wq_max_concurrency[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
 }
diff --git a/kern/kern_synch.c b/kern/kern_synch.c
index 217ddcb..7dabe41 100644
--- a/kern/kern_synch.c
+++ b/kern/kern_synch.c
@@ -69,6 +69,7 @@
 #include <kern/sched_prim.h>
 #include <kern/processor.h>
 #include <kern/block_hint.h>
+#include <kern/turnstile.h>
 //#include <kern/mach_param.h>
 #include <mach/mach_vm.h>
 #include <mach/mach_param.h>
@@ -82,7 +83,6 @@
 #include <libkern/OSAtomic.h>
 
 #include <pexpert/pexpert.h>
-#include <sys/pthread_shims.h>
 
 #include "kern_internal.h"
 #include "synch_internal.h"
@@ -92,9 +92,7 @@ typedef struct uthread *uthread_t;
 
 //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
 #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
-
-#define ECVCERORR	256
-#define ECVPERORR	512
+#define __FAILEDUSERTEST2__(s, x...) do { printf("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0)
 
 lck_mtx_t *pthread_list_mlock;
 
@@ -119,17 +117,23 @@ struct ksyn_queue {
 };
 typedef struct ksyn_queue *ksyn_queue_t;
 
-enum {
+typedef enum {
 	KSYN_QUEUE_READ = 0,
-	KSYN_QUEUE_WRITER,
+	KSYN_QUEUE_WRITE,
 	KSYN_QUEUE_MAX,
-};
+} kwq_queue_type_t;
+
+typedef enum {
+	KWQ_INTR_NONE = 0,
+	KWQ_INTR_READ = 0x1,
+	KWQ_INTR_WRITE = 0x2,
+} kwq_intr_type_t;
 
 struct ksyn_wait_queue {
 	LIST_ENTRY(ksyn_wait_queue) kw_hash;
 	LIST_ENTRY(ksyn_wait_queue) kw_list;
 	user_addr_t kw_addr;
-	uint64_t kw_owner;
+	thread_t kw_owner;		/* current owner or THREAD_NULL, has a +1 */
 	uint64_t kw_object;		/* object backing in shared mode */
 	uint64_t kw_offset;		/* offset inside the object in shared mode */
 	int	kw_pflags;		/* flags under listlock protection */
@@ -151,19 +155,23 @@ struct ksyn_wait_queue {
 	uint32_t kw_lastseqword;		/* the last seq that unlocked */
 	/* for mutex and cvar we need to track I bit values */
 	uint32_t kw_nextseqword;	/* the last seq that unlocked; with num of waiters */
-	uint32_t kw_overlapwatch;	/* chance for overlaps */
-	uint32_t kw_pre_rwwc;		/* prepost count */
-	uint32_t kw_pre_lockseq;	/* prepost target seq */
-	uint32_t kw_pre_sseq;		/* prepost target sword, in cvar used for mutexowned */
-	uint32_t kw_pre_intrcount;	/* prepost of missed wakeup due to intrs */
-	uint32_t kw_pre_intrseq;	/* prepost of missed wakeup limit seq */
-	uint32_t kw_pre_intrretbits;	/* return bits value for missed wakeup threads */
-	uint32_t kw_pre_intrtype;	/* type of failed wakueps*/
+	struct {
+		uint32_t count; /* prepost count */
+		uint32_t lseq; /* prepost target seq */
+		uint32_t sseq; /* prepost target sword, in cvar used for mutexowned */
+	} kw_prepost;
+	struct {
+		kwq_intr_type_t type; /* type of failed wakueps */
+		uint32_t count; /* prepost of missed wakeup due to intrs */
+		uint32_t seq; /* prepost of missed wakeup limit seq */
+		uint32_t returnbits; /* return bits value for missed wakeup threads */
+	} kw_intr;
 	
 	int 	kw_kflags;
 	int		kw_qos_override;	/* QoS of max waiter during contention period */
+	struct turnstile *kw_turnstile;
 	struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX];	/* queues to hold threads */
-	lck_mtx_t kw_lock;		/* mutex lock protecting this structure */
+	lck_spin_t kw_lock;		/* spinlock protecting this structure */
 };
 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 
@@ -189,14 +197,9 @@ typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 /*
  * Mutex policy attributes
  */
-#define _PTHREAD_MUTEX_POLICY_NONE		0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE		0x040	/* 1 */
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT		0x080	/* 2 */
-#define _PTHREAD_MUTEX_POLICY_REALTIME		0x0c0	/* 3 */
-#define _PTHREAD_MUTEX_POLICY_ADAPTIVE		0x100	/* 4 */
-#define _PTHREAD_MUTEX_POLICY_PRIPROTECT	0x140	/* 5 */
-#define _PTHREAD_MUTEX_POLICY_PRIINHERIT	0x180	/* 6 */
-#define PTHREAD_POLICY_FLAGS_MASK		0x1c0
+#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE	0x040	/* 1 */
+#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT	0x080	/* 2 */
+#define _PTHREAD_MTX_OPT_POLICY_MASK		0x1c0
 
 /* pflags */
 #define KSYN_WQ_INHASH	2
@@ -205,9 +208,10 @@ typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 #define KSYN_WQ_FLIST 	0X10	/* in free list to be freed after a short delay */
 
 /* kflags */
-#define KSYN_KWF_INITCLEARED	1	/* the init status found and preposts cleared */
-#define KSYN_KWF_ZEROEDOUT	2	/* the lword, etc are inited to 0 */
-#define KSYN_KWF_QOS_APPLIED	4	/* QoS override applied to owner */
+#define KSYN_KWF_INITCLEARED	0x1	/* the init status found and preposts cleared */
+#define KSYN_KWF_ZEROEDOUT	0x2	/* the lword, etc are inited to 0 */
+#define KSYN_KWF_QOS_APPLIED	0x4	/* QoS override applied to owner */
+#define KSYN_KWF_OVERLAP_GUARD	0x8	/* overlap guard */
 
 #define KSYN_CLEANUP_DEADLINE 10
 static int psynch_cleanupset;
@@ -223,47 +227,24 @@ thread_call_t psynch_thcall;
 
 #define KSYN_WQTYPE_MUTEXDROP	(KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
 
-#define KW_UNLOCK_PREPOST 		0x01
-#define KW_UNLOCK_PREPOST_READLOCK 	0x08
-#define KW_UNLOCK_PREPOST_WRLOCK 	0x20
-
-static void
-CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq)
+static inline int
+_kwq_type(ksyn_wait_queue_t kwq)
 {
-	kwq->kw_pre_lockseq = 0;
-	kwq->kw_pre_sseq = PTHRW_RWS_INIT;
-	kwq->kw_pre_rwwc = 0;
+	return (kwq->kw_type & KSYN_WQTYPE_MASK);
 }
 
-static void
-CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq)
+static inline bool
+_kwq_use_turnstile(ksyn_wait_queue_t kwq)
 {
-	kwq->kw_pre_intrcount = 0;
-	kwq->kw_pre_intrseq = 0;
-	kwq->kw_pre_intrretbits = 0;
-	kwq->kw_pre_intrtype = 0;
+	// <rdar://problem/15926625> If we had writer-owner information from the
+	// rwlock then we could use the turnstile to push on it. For now, only
+	// plain mutexes use it.
+	return (_kwq_type(kwq) == KSYN_WQTYPE_MTX);
 }
 
-static void
-CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
-{
-	if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
-		if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
-			panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
-		}
-	};
-	if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
-		kwq->kw_nextseqword = PTHRW_RWS_INIT;
-		kwq->kw_overlapwatch = 0;
-	};
-	CLEAR_PREPOST_BITS(kwq);
-	kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
-	kwq->kw_lastseqword = PTHRW_RWS_INIT;
-	CLEAR_INTR_PREPOST_BITS(kwq);
-	kwq->kw_lword = 0;
-	kwq->kw_uword = 0;
-	kwq->kw_sword = PTHRW_RWS_INIT;
-}
+#define KW_UNLOCK_PREPOST 		0x01
+#define KW_UNLOCK_PREPOST_READLOCK 	0x08
+#define KW_UNLOCK_PREPOST_WRLOCK 	0x20
 
 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
 static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
@@ -272,13 +253,11 @@ static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
 
 static int _wait_result_to_errno(wait_result_t result);
 
-static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t, block_hint_t);
-static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
+static int ksyn_wait(ksyn_wait_queue_t, kwq_queue_type_t, uint32_t, int, uint64_t, uint16_t, thread_continue_t, block_hint_t);
+static kern_return_t ksyn_signal(ksyn_wait_queue_t, kwq_queue_type_t, ksyn_waitq_element_t, uint32_t);
 static void ksyn_freeallkwe(ksyn_queue_t kq);
 
-static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t);
-static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost);
-static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t);
+static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t, thread_t *);
 
 static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
 
@@ -299,8 +278,10 @@ static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t
 static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
 static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
 
-static void psynch_cvcontinue(void *, wait_result_t);
-static void psynch_mtxcontinue(void *, wait_result_t);
+static void __dead2 psynch_cvcontinue(void *, wait_result_t);
+static void __dead2 psynch_mtxcontinue(void *, wait_result_t);
+static void __dead2 psynch_rw_rdcontinue(void *, wait_result_t);
+static void __dead2 psynch_rw_wrcontinue(void *, wait_result_t);
 
 static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
 static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
@@ -335,6 +316,196 @@ UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc
 	}
 }
 
+static inline void
+_kwq_clear_preposted_wakeup(ksyn_wait_queue_t kwq)
+{
+	kwq->kw_prepost.lseq = 0;
+	kwq->kw_prepost.sseq = PTHRW_RWS_INIT;
+	kwq->kw_prepost.count = 0;
+}
+
+static inline void
+_kwq_mark_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t count,
+		uint32_t lseq, uint32_t sseq)
+{
+	kwq->kw_prepost.count = count;
+	kwq->kw_prepost.lseq = lseq;
+	kwq->kw_prepost.sseq = sseq;
+}
+
+static inline void
+_kwq_clear_interrupted_wakeup(ksyn_wait_queue_t kwq)
+{
+	kwq->kw_intr.type = KWQ_INTR_NONE;
+	kwq->kw_intr.count = 0;
+	kwq->kw_intr.seq = 0;
+	kwq->kw_intr.returnbits = 0;
+}
+
+static inline void
+_kwq_mark_interruped_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
+		uint32_t count, uint32_t lseq, uint32_t returnbits)
+{
+	kwq->kw_intr.count = count;
+	kwq->kw_intr.seq = lseq;
+	kwq->kw_intr.returnbits = returnbits;
+	kwq->kw_intr.type = type;
+}
+
+static void
+_kwq_destroy(ksyn_wait_queue_t kwq)
+{
+	if (kwq->kw_owner) {
+		thread_deallocate(kwq->kw_owner);
+	}
+	lck_spin_destroy(&kwq->kw_lock, pthread_lck_grp);
+	zfree(kwq_zone, kwq);
+}
+
+#define KWQ_SET_OWNER_TRANSFER_REF  0x1
+
+static inline thread_t
+_kwq_set_owner(ksyn_wait_queue_t kwq, thread_t new_owner, int flags)
+{
+	thread_t old_owner = kwq->kw_owner;
+	if (old_owner == new_owner) {
+		if (flags & KWQ_SET_OWNER_TRANSFER_REF) return new_owner;
+		return THREAD_NULL;
+	}
+	if ((flags & KWQ_SET_OWNER_TRANSFER_REF) == 0) {
+		thread_reference(new_owner);
+	}
+	kwq->kw_owner = new_owner;
+	return old_owner;
+}
+
+static inline thread_t
+_kwq_clear_owner(ksyn_wait_queue_t kwq)
+{
+	return _kwq_set_owner(kwq, THREAD_NULL, KWQ_SET_OWNER_TRANSFER_REF);
+}
+
+static inline void
+_kwq_cleanup_old_owner(thread_t *thread)
+{
+	if (*thread) {
+		thread_deallocate(*thread);
+		*thread = THREAD_NULL;
+	}
+}
+
+static void
+CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
+{
+	if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
+		if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
+			panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
+		}
+	};
+	if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
+		kwq->kw_nextseqword = PTHRW_RWS_INIT;
+		kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
+	};
+	_kwq_clear_preposted_wakeup(kwq);
+	kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
+	kwq->kw_lastseqword = PTHRW_RWS_INIT;
+	_kwq_clear_interrupted_wakeup(kwq);
+	kwq->kw_lword = 0;
+	kwq->kw_uword = 0;
+	kwq->kw_sword = PTHRW_RWS_INIT;
+}
+
+static bool
+_kwq_handle_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t type,
+		uint32_t lseq, uint32_t *retval)
+{
+	if (kwq->kw_prepost.count == 0 ||
+			!is_seqlower_eq(lseq, kwq->kw_prepost.lseq)) {
+		return false;
+	}
+
+	kwq->kw_prepost.count--;
+	if (kwq->kw_prepost.count > 0) {
+		return false;
+	}
+
+	int error, should_block = 0;
+	uint32_t updatebits = 0;
+	uint32_t pp_lseq = kwq->kw_prepost.lseq;
+	uint32_t pp_sseq = kwq->kw_prepost.sseq;
+	_kwq_clear_preposted_wakeup(kwq);
+
+	kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+
+	error = kwq_handle_unlock(kwq, pp_lseq, pp_sseq, &updatebits,
+			(type | KW_UNLOCK_PREPOST), &should_block, lseq);
+	if (error) {
+		panic("_kwq_handle_preposted_wakeup: kwq_handle_unlock failed %d",
+				error);
+	}
+
+	if (should_block) {
+		return false;
+	}
+	*retval = updatebits;
+	return true;
+}
+
+static bool
+_kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t type, uint32_t lgenval, 
+		uint32_t rw_wc, uint32_t *retval)
+{
+	int res = 0;
+
+	// overlaps only occur on read lockers
+	if (type != PTH_RW_TYPE_READ) {
+		return false;
+	}
+
+	// check for overlap and no pending W bit (indicates writers)
+	if ((kwq->kw_kflags & KSYN_KWF_OVERLAP_GUARD) &&
+			!is_rws_savemask_set(rw_wc) && !is_rwl_wbit_set(lgenval)) {
+		/* overlap is set, so no need to check for valid state for overlap */
+
+		if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
+			/* increase the next expected seq by one */
+			kwq->kw_nextseqword += PTHRW_INC;
+			/* set count by one & bits from the nextseq and add M bit */
+			*retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
+			res = 1;
+		}
+	}
+	return res;
+}
+
+static inline bool
+_kwq_is_used(ksyn_wait_queue_t kwq)
+{
+	return (kwq->kw_inqueue != 0 || kwq->kw_prepost.count != 0 ||
+			kwq->kw_intr.count != 0);
+}
+
+/*
+ * consumes a pending interrupted waiter, returns true if the current
+ * thread should return back to userspace because it was previously
+ * interrupted.
+ */
+static inline bool
+_kwq_handle_interrupted_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
+		uint32_t lseq, uint32_t *retval)
+{
+	if (kwq->kw_intr.count != 0 && kwq->kw_intr.type == type &&
+			(!kwq->kw_intr.seq || is_seqlower_eq(lseq, kwq->kw_intr.seq))) {
+		kwq->kw_intr.count--;
+		*retval = kwq->kw_intr.returnbits;
+		if (kwq->kw_intr.returnbits == 0) {
+			_kwq_clear_interrupted_wakeup(kwq);
+		}
+		return true;
+	}
+	return false;
+}
+
 static void
 pthread_list_lock(void)
 {
@@ -350,98 +521,117 @@ pthread_list_unlock(void)
 static void
 ksyn_wqlock(ksyn_wait_queue_t kwq)
 {
-	
-	lck_mtx_lock(&kwq->kw_lock);
+	lck_spin_lock(&kwq->kw_lock);
 }
 
 static void
 ksyn_wqunlock(ksyn_wait_queue_t kwq)
 {
-	lck_mtx_unlock(&kwq->kw_lock);
+	lck_spin_unlock(&kwq->kw_lock);
 }
 
-
 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
 static uint32_t
-_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags)
+_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen,
+		int flags)
 {
 	kern_return_t ret;
 	uint32_t returnbits = 0;
-	int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+	uint32_t updatebits = 0;
+	int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) ==
+			_PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
 	uint32_t nextgen = (ugen + PTHRW_INC);
+	thread_t old_owner = THREAD_NULL;
 
 	ksyn_wqlock(kwq);
 	kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
-	uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
 
 redrive:
+	updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
+			(PTH_RWL_EBIT | PTH_RWL_KBIT);
+
 	if (firstfit) {
 		if (kwq->kw_inqueue == 0) {
-			// not set or the new lock sequence is higher
-			if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) {
-				kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK);
-			}
-			kwq->kw_pre_rwwc = 1;
-			ksyn_mtx_drop_qos_override(kwq);
-			kwq->kw_owner = 0;
-			// indicate prepost content in kernel
-			returnbits = mgen | PTH_RWL_PBIT;
+			uint32_t count = kwq->kw_prepost.count + 1;
+			// Increment the number of preposters we have waiting
+			_kwq_mark_preposted_wakeup(kwq, count, mgen & PTHRW_COUNT_MASK, 0);
+			// We don't know the current owner as we've determined this mutex
+			// drop should have a preposted locker inbound into the kernel but
+			// we have no way of knowing who it is. When it arrives, the lock
+			// path will update the turnstile owner and return it to userspace.
+			old_owner = _kwq_clear_owner(kwq);
+			pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
+					&kwq->kw_turnstile);
+			PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+					kwq->kw_prepost.lseq, count, 0);
 		} else {
 			// signal first waiter
-			ret = ksyn_mtxsignal(kwq, NULL, updatebits);
+			ret = ksyn_mtxsignal(kwq, NULL, updatebits, &old_owner);
 			if (ret == KERN_NOT_WAITING) {
+				// <rdar://problem/39093536> ksyn_mtxsignal attempts to signal
+				// the thread but it sets up the turnstile inheritor first.
+				// That means we can't redrive the mutex in a loop without
+				// dropping the wq lock and cleaning up the turnstile state.
+				ksyn_wqunlock(kwq);
+				pthread_kern->psynch_wait_cleanup();
+				_kwq_cleanup_old_owner(&old_owner);
+				ksyn_wqlock(kwq);
 				goto redrive;
 			}
 		}
 	} else {	
-		int prepost = 0;
+		bool prepost = false;
 		if (kwq->kw_inqueue == 0) {
 			// No waiters in the queue.
-			prepost = 1;
+			prepost = true;
 		} else {
-			uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK);
+			uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum & PTHRW_COUNT_MASK);
 			if (low_writer == nextgen) {
 				/* next seq to be granted found */
 				/* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
-				ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT);
+				ret = ksyn_mtxsignal(kwq, NULL,
+						updatebits | PTH_RWL_MTX_WAIT, &old_owner);
 				if (ret == KERN_NOT_WAITING) {
 					/* interrupt post */
-					kwq->kw_pre_intrcount = 1;
-					kwq->kw_pre_intrseq = nextgen;
-					kwq->kw_pre_intrretbits = updatebits;
-					kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
+					_kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
+							nextgen, updatebits);
 				}
-				
 			} else if (is_seqhigher(low_writer, nextgen)) {
-				prepost = 1;
+				prepost = true;
 			} else {
 				//__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
 				ksyn_waitq_element_t kwe;
-				kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
+				kwe = ksyn_queue_find_seq(kwq,
+						&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], nextgen);
 				if (kwe != NULL) {
 					/* next seq to be granted found */
 					/* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
-					ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT);
+					ret = ksyn_mtxsignal(kwq, kwe,
+							updatebits | PTH_RWL_MTX_WAIT, &old_owner);
 					if (ret == KERN_NOT_WAITING) {
 						goto redrive;
 					}
 				} else {
-					prepost = 1;
+					prepost = true;
 				}
 			}
 		}
 		if (prepost) {
-			ksyn_mtx_drop_qos_override(kwq);
-			kwq->kw_owner = 0;
-			if (++kwq->kw_pre_rwwc > 1) {
+			if (kwq->kw_prepost.count != 0) {
 				__FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
 			} else {
-				kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
+				_kwq_mark_preposted_wakeup(kwq, 1, nextgen & PTHRW_COUNT_MASK,
+						0);
 			}
+			old_owner = _kwq_clear_owner(kwq);
+			pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
+					&kwq->kw_turnstile);
 		}
 	}
-	
+
 	ksyn_wqunlock(kwq);
+	pthread_kern->psynch_wait_cleanup();
+	_kwq_cleanup_old_owner(&old_owner);
 	ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
 	return returnbits;
 }
@@ -460,354 +650,216 @@ _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
 	return res;
 }
 
-static int
-_ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq,
-			    uint32_t type,
-			    uint32_t lockseq,
-			    uint32_t *retval)
-{
-	int res = 0;
-	if (kwq->kw_pre_intrcount != 0 &&
-		kwq->kw_pre_intrtype == type &&
-		(kwq->kw_pre_intrseq == 0 || is_seqlower_eq(lockseq, kwq->kw_pre_intrseq))) {
-		kwq->kw_pre_intrcount--;
-		*retval = kwq->kw_pre_intrretbits;
-		if (kwq->kw_pre_intrcount == 0) {
-			CLEAR_INTR_PREPOST_BITS(kwq);
-		}
-		res = 1;
-	}
-	return res;
-}
-
-static int
-_ksyn_handle_overlap(ksyn_wait_queue_t kwq,
-		     uint32_t lgenval,
-		     uint32_t rw_wc,
-		     uint32_t *retval)
+/*
+ * psynch_mutexwait: This system call is used for contended psynch mutexes to
+ * block.
+ */
+int
+_psynch_mutexwait(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
+		uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval)
 {
-	int res = 0;
-
-	// check for overlap and no pending W bit (indicates writers)
-	if (kwq->kw_overlapwatch != 0 &&
-	    (rw_wc & PTHRW_RWS_SAVEMASK) == 0 &&
-	    (lgenval & PTH_RWL_WBIT) == 0) {
-		/* overlap is set, so no need to check for valid state for overlap */
+	ksyn_wait_queue_t kwq;
+	int error = 0;
+	int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
+			== _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+	int ins_flags = SEQFIT;
+	uint32_t lseq = (mgen & PTHRW_COUNT_MASK);
+	uint32_t updatebits = 0;
+	thread_t tid_th = THREAD_NULL, old_owner = THREAD_NULL;
 
-		if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
-			/* increase the next expected seq by one */
-			kwq->kw_nextseqword += PTHRW_INC;
-			/* set count by one & bits from the nextseq and add M bit */
-			*retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
-			res = 1;
-		}
+	if (firstfit) {
+		/* first fit */
+		ins_flags = FIRSTFIT;
 	}
-	return res;
-}
 
-static int
-_ksyn_handle_prepost(ksyn_wait_queue_t kwq,
-		     uint32_t type,
-		     uint32_t lockseq,
-		     uint32_t *retval)
-{
-	int res = 0;
-	if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) {
-		kwq->kw_pre_rwwc--;
-		if (kwq->kw_pre_rwwc == 0) {
-			uint32_t preseq = kwq->kw_pre_lockseq;
-			uint32_t prerw_wc = kwq->kw_pre_sseq;
-			CLEAR_PREPOST_BITS(kwq);
-			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
-				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
-			}
+	error = ksyn_wqfind(mutex, mgen, ugen, 0, flags,
+			(KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX), &kwq);
+	if (error != 0) {
+		return error;
+	}
 
-			int error, block;
-			uint32_t updatebits;
-			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq);
-			if (error != 0) {
-				panic("kwq_handle_unlock failed %d\n", error);
-			}
+again:
+	ksyn_wqlock(kwq);
 
-			if (block == 0) {
-				*retval = updatebits;
-				res = 1;
-			}
-		}
+	if (_kwq_handle_interrupted_wakeup(kwq, KWQ_INTR_WRITE, lseq, retval)) {
+		old_owner = _kwq_set_owner(kwq, current_thread(), 0);
+		pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
+				&kwq->kw_turnstile);
+		ksyn_wqunlock(kwq);
+		_kwq_cleanup_old_owner(&old_owner);
+		goto out;
 	}
-	return res;
-}
 
-/* Helpers for QoS override management. Only applies to mutexes */
-static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost)
-{
-	if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-		boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-		int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread());
-		
-		kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override);
-		
-		if (prepost && kwq->kw_inqueue == 0) {
-			// if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an
-			// override, because the receiving owner may not re-enter the kernel to signal someone else if it is
-			// the last one to unlock. If other waiters end up entering the kernel, they will boost the owner
-			tid = 0;
-		}
-		
-		if (tid != 0) {
-			if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) {
-				// hint continues to be accurate, and a boost was already applied
-				pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, FALSE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-			} else {
-				// either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously
-				boolean_t boostsucceded;
-				
-				boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-				
-				if (boostsucceded) {
-					kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
-				}
-
-				if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) {
-					// the hint did not match the previous owner, so drop overrides
-					PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-					pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-				}
-			}
-		} else {
-			// new hint tells us that we don't know the owner, so drop any existing overrides
-			kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-			kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-
-			if (wasboosted && (kwq->kw_owner != 0)) {
-				// the hint did not match the previous owner, so drop overrides
-				PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-				pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
+	if (kwq->kw_prepost.count && (firstfit || (lseq == kwq->kw_prepost.lseq))) {
+		/* got preposted lock */
+		kwq->kw_prepost.count--;
+
+		if (!firstfit) {
+			if (kwq->kw_prepost.count > 0) {
+				__FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
+				kwq->kw_prepost.lseq += PTHRW_INC; /* look for next one */
+				ksyn_wqunlock(kwq);
+				error = EINVAL;
+				goto out;
 			}
+			_kwq_clear_preposted_wakeup(kwq);
 		}
-	}
-}
 
-static boolean_t
-ksyn_mtx_transfer_qos_override_begin(ksyn_wait_queue_t kwq,
-		ksyn_waitq_element_t kwe, uint64_t *kw_owner)
-{
-	boolean_t needs_commit = FALSE;
-	if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-		boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-
-		if (kwq->kw_inqueue > 1) {
-			boolean_t boostsucceeded;
-
-			// More than one waiter, so resource will still be contended after handing off ownership
-			boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-
-			if (boostsucceeded) {
-				kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
-			}
+		if (kwq->kw_inqueue == 0) {
+			updatebits = lseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
 		} else {
-			// kw_inqueue == 1 to get to this point, which means there will be no contention after this point
-			kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-			kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-		}
-
-		// Remove the override that was applied to kw_owner. There may have been a race,
-		// in which case it may not match the current thread
-		if (wasboosted) {
-			if (kwq->kw_owner == 0) {
-				PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
-			} else if (thread_tid(current_thread()) != kwq->kw_owner) {
-				PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-				*kw_owner = kwq->kw_owner;
-				needs_commit = TRUE;
-			} else {
-				*kw_owner = 0;
-				needs_commit = TRUE;
-			}
+			updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
+					(PTH_RWL_KBIT | PTH_RWL_EBIT);
 		}
-	}
-	return needs_commit;
-}
-
-static void
-ksyn_mtx_transfer_qos_override_commit(ksyn_wait_queue_t kwq, uint64_t kw_owner)
-{
-	struct uthread *uthread = kw_owner ? NULL : current_uthread();
-
-	pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(
-			current_task(), uthread, kw_owner, kwq->kw_addr,
-			THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-}
+		updatebits &= ~PTH_RWL_MTX_WAIT;
 
-static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq)
-{
-	if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
-		boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
-		
-		// assume nobody else in queue if this routine was called
-		kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
-		kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
-		
-		// Remove the override that was applied to kw_owner. There may have been a race,
-		// in which case it may not match the current thread
-		if (wasboosted) {
-			if (kwq->kw_owner == 0) {
-				PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
-			} else if (thread_tid(current_thread()) != kwq->kw_owner) {
-				PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
-				pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-			} else {
-				pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
-			}
+		if (updatebits == 0) {
+			__FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
 		}
-	}
-}
 
-/*
- * psynch_mutexwait: This system call is used for contended psynch mutexes to block.
- */
+		PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+				kwq->kw_prepost.lseq, kwq->kw_prepost.count, 1);
 
-int
-_psynch_mutexwait(__unused proc_t p,
-		  user_addr_t mutex,
-		  uint32_t mgen,
-		  uint32_t ugen,
-		  uint64_t tid,
-		  uint32_t flags,
-		  uint32_t *retval)
-{
-	ksyn_wait_queue_t kwq;
-	int error=0;
-	int ins_flags;
-
-	int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
-	uint32_t updatebits = 0;
-
-	uint32_t lockseq = (mgen & PTHRW_COUNT_MASK);
-	
-	if (firstfit == 0) {
-		ins_flags = SEQFIT;
-	} else {
-		/* first fit */
-		ins_flags = FIRSTFIT;
-	}
-	
-	error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
-	if (error != 0) {
-		return(error);
+		old_owner = _kwq_set_owner(kwq, current_thread(), 0);
+		pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
+				&kwq->kw_turnstile);
+		
+		ksyn_wqunlock(kwq);
+		_kwq_cleanup_old_owner(&old_owner);
+		*retval = updatebits;
+		goto out;
 	}
-	
-	ksyn_wqlock(kwq);
 
-	// mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the
-	// owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond
-	// to a stale snapshot after the lock has subsequently been unlocked by another thread.
-	if (tid == 0) {
+	// mutexwait passes in an owner hint at the time userspace contended for
+	// the mutex, however, the owner tid in the userspace data structure may be
+	// unset or SWITCHING (-1), or it may correspond to a stale snapshot after
+	// the lock has subsequently been unlocked by another thread.
+	if (tid == thread_tid(kwq->kw_owner)) {
+		// userspace and kernel agree
+	} else if (tid == 0) {
 		// contender came in before owner could write TID
-		tid = 0;
-	} else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) {
-		// owner is stale, someone has come in and unlocked since this contended read the TID, so
-		// assume what is known in the kernel is accurate
-		tid = kwq->kw_owner;
+		// let's assume that what the kernel knows is accurate
+		// for all we know this waiter came in late in the kernel
+	} else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT &&
+			   is_seqlower(ugen, kwq->kw_lastunlockseq)) {
+		// owner is stale, someone has come in and unlocked since this
+		// contended read the TID, so assume what is known in the kernel is
+		// accurate
 	} else if (tid == PTHREAD_MTX_TID_SWITCHING) {
-		// userspace didn't know the owner because it was being unlocked, but that unlocker hasn't
-		// reached the kernel yet. So assume what is known in the kernel is accurate
-		tid = kwq->kw_owner;
+		// userspace didn't know the owner because it was being unlocked, but
+		// that unlocker hasn't reached the kernel yet. So assume what is known
+		// in the kernel is accurate
 	} else {
-		// hint is being passed in for a specific thread, and we have no reason not to trust
-		// it (like the kernel unlock sequence being higher
-	}
-
-	
-	if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) {
-		ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
-		kwq->kw_owner = thread_tid(current_thread());
-
-		ksyn_wqunlock(kwq);
-		goto out;
-	}
-	
-	if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
-		/* got preposted lock */
-		kwq->kw_pre_rwwc--;
-		if (kwq->kw_pre_rwwc == 0) {
-			CLEAR_PREPOST_BITS(kwq);
-			if (kwq->kw_inqueue == 0) {
-				updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
-			} else {
-				updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
-			}
-			updatebits &= ~PTH_RWL_MTX_WAIT;
-			
-			if (updatebits == 0) {
-				__FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
-			}
-			
-			ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
-			kwq->kw_owner = thread_tid(current_thread());
-            
-			ksyn_wqunlock(kwq);
-			*retval = updatebits;
-			goto out;
-		} else {
-			__FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
-			kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
+		// hint is being passed in for a specific thread, and we have no reason
+		// not to trust it (like the kernel unlock sequence being higher)
+		//
+		// So resolve the hint to a thread_t if we haven't done so yet
+		// and redrive as we dropped the lock
+		if (tid_th == THREAD_NULL) {
 			ksyn_wqunlock(kwq);
-			error = EINVAL;
-			goto out;
+			tid_th = pthread_kern->task_findtid(current_task(), tid);
+			if (tid_th == THREAD_NULL) tid = 0;
+			goto again;
 		}
+		tid_th = _kwq_set_owner(kwq, tid_th, KWQ_SET_OWNER_TRANSFER_REF);
 	}
-	
-	ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
-	kwq->kw_owner = tid;
 
-	error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue, kThreadWaitPThreadMutex);
+	if (tid_th) {
+		// We are on our way to block, and can't drop the spinlock anymore
+		pthread_kern->thread_deallocate_safe(tid_th);
+		tid_th = THREAD_NULL;
+	}
+	error = ksyn_wait(kwq, KSYN_QUEUE_WRITE, mgen, ins_flags, 0, 0,
+			psynch_mtxcontinue, kThreadWaitPThreadMutex);
 	// ksyn_wait drops wait queue lock
 out:
-	ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
+	pthread_kern->psynch_wait_cleanup();
+	ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
+	if (tid_th) {
+		thread_deallocate(tid_th);
+	}
 	return error;
 }
 
-void
+void __dead2
 psynch_mtxcontinue(void *parameter, wait_result_t result)
 {
 	uthread_t uth = current_uthread();
 	ksyn_wait_queue_t kwq = parameter;
 	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-	
+
+	ksyn_wqlock(kwq);
+
 	int error = _wait_result_to_errno(result);
 	if (error != 0) {
-		ksyn_wqlock(kwq);
 		if (kwe->kwe_kwqqueue) {
-			ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
+			ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
 		}
-		ksyn_wqunlock(kwq);
 	} else {
 		uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
 		pthread_kern->uthread_set_returnval(uth, updatebits);
-		
-		if (updatebits == 0)
+
+		if (updatebits == 0) {
 			__FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
+		}
 	}
-	ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
+
+	pthread_kern->psynch_wait_complete(kwq, &kwq->kw_turnstile);
+
+	ksyn_wqunlock(kwq);
+	pthread_kern->psynch_wait_cleanup();
+	ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
 	pthread_kern->unix_syscall_return(error);
+	__builtin_unreachable();
+}
+
+static void __dead2
+_psynch_rw_continue(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
+		wait_result_t result)
+{
+	uthread_t uth = current_uthread();
+	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
+
+	ksyn_wqlock(kwq);
+
+	int error = _wait_result_to_errno(result);
+	if (error != 0) {
+		if (kwe->kwe_kwqqueue) {
+			ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
+		}
+	} else {
+		pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
+	}
+
+	ksyn_wqunlock(kwq);
+	ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
+
+	pthread_kern->unix_syscall_return(error);
+	__builtin_unreachable();
+}
+
+void __dead2
+psynch_rw_rdcontinue(void *parameter, wait_result_t result)
+{
+	_psynch_rw_continue(parameter, KSYN_QUEUE_READ, result);
+}
+
+void __dead2
+psynch_rw_wrcontinue(void *parameter, wait_result_t result)
+{
+	_psynch_rw_continue(parameter, KSYN_QUEUE_WRITE, result);
 }
 
 /*
  * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
  */
 int
-_psynch_mutexdrop(__unused proc_t p,
-		  user_addr_t mutex,
-		  uint32_t mgen,
-		  uint32_t ugen,
-		  uint64_t tid __unused,
-		  uint32_t flags,
-		  uint32_t *retval)
+_psynch_mutexdrop(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
+		uint32_t ugen, uint64_t tid __unused, uint32_t flags, uint32_t *retval)
 {
 	int res;
 	ksyn_wait_queue_t kwq;
-	
+
 	res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
 	if (res == 0) {
 		uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
@@ -821,65 +873,57 @@ _psynch_mutexdrop(__unused proc_t p,
 }
 
 static kern_return_t
-ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval)
+ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe,
+		uint32_t updateval, thread_t *old_owner)
 {
 	kern_return_t ret;
-	boolean_t needs_commit;
-	uint64_t kw_owner;
 
 	if (!kwe) {
-		kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist);
+		kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_kwelist);
 		if (!kwe) {
 			panic("ksyn_mtxsignal: panic signaling empty queue");
 		}
 	}
 
-	needs_commit = ksyn_mtx_transfer_qos_override_begin(kwq, kwe, &kw_owner);
-	kwq->kw_owner = kwe->kwe_tid;
-
-	ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval);
+	PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_START, kwq->kw_addr, kwe,
+			thread_tid(kwe->kwe_thread), kwq->kw_inqueue);
 
-	// if waking the new owner failed, remove any overrides
-	if (ret != KERN_SUCCESS) {
-		ksyn_mtx_drop_qos_override(kwq);
-		kwq->kw_owner = 0;
-	} else if (needs_commit) {
-		ksyn_mtx_transfer_qos_override_commit(kwq, kw_owner);
+	ret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, kwe, updateval);
+	if (ret == KERN_SUCCESS) {
+		*old_owner = _kwq_set_owner(kwq, kwe->kwe_thread, 0);
+	} else {
+		*old_owner = _kwq_clear_owner(kwq);
 	}
+	PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_END, kwq->kw_addr, kwe,
+			ret, 0);
 	return ret;
 }
 
 
 static void
-ksyn_prepost(ksyn_wait_queue_t kwq,
-	     ksyn_waitq_element_t kwe,
-	     uint32_t state,
+ksyn_prepost(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t state,
 	     uint32_t lockseq)
 {
 	bzero(kwe, sizeof(*kwe));
 	kwe->kwe_state = state;
 	kwe->kwe_lockseq = lockseq;
 	kwe->kwe_count = 1;
-	
-	(void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT);
+
+	(void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITE, kwe, lockseq, SEQFIT);
 	kwq->kw_fakecount++;
 }
 
 static void
-ksyn_cvsignal(ksyn_wait_queue_t ckwq,
-	      thread_t th,
-	      uint32_t uptoseq,
-	      uint32_t signalseq,
-	      uint32_t *updatebits,
-	      int *broadcast,
-	      ksyn_waitq_element_t *nkwep)
+ksyn_cvsignal(ksyn_wait_queue_t ckwq, thread_t th, uint32_t uptoseq,
+		uint32_t signalseq, uint32_t *updatebits, int *broadcast,
+		ksyn_waitq_element_t *nkwep)
 {
 	ksyn_waitq_element_t kwe = NULL;
 	ksyn_waitq_element_t nkwe = NULL;
-	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
-	
+	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
+
 	uptoseq &= PTHRW_COUNT_MASK;
-	
+
 	// Find the specified thread to wake.
 	if (th != THREAD_NULL) {
 		uthread_t uth = pthread_kern->get_bsdthread_info(th);
@@ -893,7 +937,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
 			return;
 		}
 	}
-	
+
 	// If no thread was specified, find any thread to wake (with the right
 	// sequence number).
 	while (th == THREAD_NULL) {
@@ -906,13 +950,13 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
 			// reacquiring the lock after allocation in
 			// case anything new shows up.
 			ksyn_wqunlock(ckwq);
-			nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
+			nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
 			ksyn_wqlock(ckwq);
 		} else {
 			break;
 		}
 	}
-	
+
 	if (kwe != NULL) {
 		// If we found a thread to wake...
 		if (kwe->kwe_state == KWE_THREAD_INWAIT) {
@@ -928,7 +972,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
 				 */
 				*broadcast = 1;
 			} else {
-				(void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
+				(void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
 				*updatebits += PTHRW_INC;
 			}
 		} else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
@@ -944,7 +988,7 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
 			 * If we allocated a new kwe above but then found a different kwe to
 			 * use then we need to deallocate the spare one.
 			 */
-			pthread_kern->zfree(kwe_zone, nkwe);
+			zfree(kwe_zone, nkwe);
 			nkwe = NULL;
 		}
 	} else if (nkwe != NULL) {
@@ -954,19 +998,14 @@ ksyn_cvsignal(ksyn_wait_queue_t ckwq,
 	} else {
 		panic("failed to allocate kwe\n");
 	}
-	
+
 	*nkwep = nkwe;
 }
 
 static int
-__psynch_cvsignal(user_addr_t cv,
-		  uint32_t cgen,
-		  uint32_t cugen,
-		  uint32_t csgen,
-		  uint32_t flags,
-		  int broadcast,
-		  mach_port_name_t threadport,
-		  uint32_t *retval)
+__psynch_cvsignal(user_addr_t cv, uint32_t cgen, uint32_t cugen,
+		uint32_t csgen, uint32_t flags, int broadcast,
+		mach_port_name_t threadport, uint32_t *retval)
 {
 	int error = 0;
 	thread_t th = THREAD_NULL;
@@ -997,11 +1036,16 @@ __psynch_cvsignal(user_addr_t cv,
 		
 		// update L, U and S...
 		UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
-		
+
+		PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_START, kwq->kw_addr,
+				fromseq, uptoseq, broadcast);
+
 		if (!broadcast) {
 			// No need to signal if the CV is already balanced.
 			if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
-				ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe);
+				ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits,
+						&broadcast, &nkwe);
+				PTHREAD_TRACE(psynch_cvar_signal, kwq->kw_addr, broadcast, 0,0);
 			}
 		}
 		
@@ -1013,11 +1057,16 @@ __psynch_cvsignal(user_addr_t cv,
 		// set C or P bits and free if needed
 		ksyn_cvupdate_fixup(kwq, &updatebits);
 		*retval = updatebits;
+
+		PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_END, kwq->kw_addr,
+				updatebits, 0, 0);
 		
 		ksyn_wqunlock(kwq);
+
+		pthread_kern->psynch_wait_cleanup();
 		
 		if (nkwe != NULL) {
-			pthread_kern->zfree(kwe_zone, nkwe);
+			zfree(kwe_zone, nkwe);
 		}
 		
 		ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
@@ -1034,15 +1083,9 @@ __psynch_cvsignal(user_addr_t cv,
  * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
  */
 int
-_psynch_cvbroad(__unused proc_t p,
-		user_addr_t cv,
-		uint64_t cvlsgen,
-		uint64_t cvudgen,
-		uint32_t flags,
-		__unused user_addr_t mutex,
-		__unused uint64_t mugen,
-		__unused uint64_t tid,
-		uint32_t *retval)
+_psynch_cvbroad(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+		uint64_t cvudgen, uint32_t flags, __unused user_addr_t mutex,
+		__unused uint64_t mugen, __unused uint64_t tid, uint32_t *retval)
 {
 	uint32_t diffgen = cvudgen & 0xffffffff;
 	uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
@@ -1062,15 +1105,9 @@ _psynch_cvbroad(__unused proc_t p,
  * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
  */
 int
-_psynch_cvsignal(__unused proc_t p,
-		 user_addr_t cv,
-		 uint64_t cvlsgen,
-		 uint32_t cvugen,
-		 int threadport,
-		 __unused user_addr_t mutex,
-		 __unused uint64_t mugen,
-		 __unused uint64_t tid,
-		 uint32_t flags,
+_psynch_cvsignal(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+		 uint32_t cvugen, int threadport, __unused user_addr_t mutex,
+		 __unused uint64_t mugen, __unused uint64_t tid, uint32_t flags,
 		 uint32_t *retval)
 {
 	uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
@@ -1083,16 +1120,9 @@ _psynch_cvsignal(__unused proc_t p,
  * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
  */
 int
-_psynch_cvwait(__unused proc_t p,
-	       user_addr_t cv,
-	       uint64_t cvlsgen,
-	       uint32_t cvugen,
-	       user_addr_t mutex,
-	       uint64_t mugen,
-	       uint32_t flags,
-	       int64_t sec,
-	       uint32_t nsec,
-	       uint32_t *retval)
+_psynch_cvwait(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
+		uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags,
+		int64_t sec, uint32_t nsec, uint32_t *retval)
 {
 	int error = 0;
 	uint32_t updatebits = 0;
@@ -1118,6 +1148,8 @@ _psynch_cvwait(__unused proc_t p,
 		__FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
 		return EINVAL;
 	}
+
+	PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_START, cv, mutex, cgen, 0);
 	
 	error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
 	if (error != 0) {
@@ -1125,7 +1157,8 @@ _psynch_cvwait(__unused proc_t p,
 	}
 	
 	if (mutex != 0) {
-		error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL);
+		uint32_t mutexrv = 0;
+		error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, &mutexrv);
 		if (error != 0) {
 			goto out;
 		}
@@ -1137,7 +1170,7 @@ _psynch_cvwait(__unused proc_t p,
 	UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
 	
 	/* Look for the sequence for prepost (or conflicting thread */
-	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
 	kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
 	if (kwe != NULL) {
 		if (kwe->kwe_state == KWE_THREAD_PREPOST) {
@@ -1171,7 +1204,7 @@ _psynch_cvwait(__unused proc_t p,
 		}
 		
 		if (error == 0) {
-			updatebits = PTHRW_INC;
+			updatebits |= PTHRW_INC;
 			ckwq->kw_sword += PTHRW_INC;
 			
 			/* set C or P bits and free if needed */
@@ -1180,45 +1213,54 @@ _psynch_cvwait(__unused proc_t p,
 		}
 	} else {
 		uint64_t abstime = 0;
+		uint16_t kwe_flags = 0;
 
 		if (sec != 0 || (nsec & 0x3fffffff) != 0) {
 			struct timespec ts;
 			ts.tv_sec = (__darwin_time_t)sec;
 			ts.tv_nsec = (nsec & 0x3fffffff);
-			nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
+			nanoseconds_to_absolutetime(
+					(uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
 			clock_absolutetime_interval_to_deadline(abstime, &abstime);
 		}
+
+		PTHREAD_TRACE(psynch_cvar_kwait, cv, mutex, kwe_flags, 1);
 		
-		error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue, kThreadWaitPThreadCondVar);
+		error = ksyn_wait(ckwq, KSYN_QUEUE_WRITE, cgen, SEQFIT, abstime,
+				kwe_flags, psynch_cvcontinue, kThreadWaitPThreadCondVar);
 		// ksyn_wait drops wait queue lock
 	}
 	
 	ksyn_wqunlock(ckwq);
-	
+
 	if (nkwe != NULL) {
-		pthread_kern->zfree(kwe_zone, nkwe);
+		zfree(kwe_zone, nkwe);
 	}
 out:
+
+	PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, cv, error, updatebits, 2);
+
 	ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
 	return error;
 }
 
 
-void
+void __dead2
 psynch_cvcontinue(void *parameter, wait_result_t result)
 {
 	uthread_t uth = current_uthread();
 	ksyn_wait_queue_t ckwq = parameter;
 	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-	
+
 	int error = _wait_result_to_errno(result);
 	if (error != 0) {
 		ksyn_wqlock(ckwq);
 		/* just in case it got woken up as we were granting */
-		pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
+		int retval = kwe->kwe_psynchretval;
+		pthread_kern->uthread_set_returnval(uth, retval);
 
 		if (kwe->kwe_kwqqueue) {
-			ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
+			ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
 		}
 		if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
 			/* the condition var granted.
@@ -1231,46 +1273,48 @@ psynch_cvcontinue(void *parameter, wait_result_t result)
 			
 			/* set C and P bits, in the local error */
 			if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
-				error |= ECVCERORR;
+				PTHREAD_TRACE(psynch_cvar_zeroed, ckwq->kw_addr,
+						ckwq->kw_lword, ckwq->kw_sword, ckwq->kw_inqueue);
+				error |= ECVCLEARED;
 				if (ckwq->kw_inqueue != 0) {
-					ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1);
+					ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 1);
 				}
 				ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
 				ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
 			} else {
 				/* everythig in the queue is a fake entry ? */
 				if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
-					error |= ECVPERORR;
+					error |= ECVPREPOST;
 				}
 			}
 		}
 		ksyn_wqunlock(ckwq);
+
+		PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
+				error, 0, 3);
 	} else {
 		int val = 0;
 		// PTH_RWL_MTX_WAIT is removed
 		if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
 			val = PTHRW_INC | PTH_RWS_CV_CBIT;
 		}
+		PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
+				val, 0, 4);
 		pthread_kern->uthread_set_returnval(uth, val);
 	}
 	
 	ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
 	pthread_kern->unix_syscall_return(error);
+	__builtin_unreachable();
 }
 
 /*
  * psynch_cvclrprepost: This system call clears pending prepost if present.
  */
 int
-_psynch_cvclrprepost(__unused proc_t p,
-		     user_addr_t cv,
-		     uint32_t cvgen,
-		     uint32_t cvugen,
-		     uint32_t cvsgen,
-		     __unused uint32_t prepocnt,
-		     uint32_t preposeq,
-		     uint32_t flags,
-		     int *retval)
+_psynch_cvclrprepost(__unused proc_t p, user_addr_t cv, uint32_t cvgen,
+		uint32_t cvugen, uint32_t cvsgen, __unused uint32_t prepocnt,
+		uint32_t preposeq, uint32_t flags, int *retval)
 {
 	int error = 0;
 	int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
@@ -1279,7 +1323,8 @@ _psynch_cvclrprepost(__unused proc_t p,
 	
 	*retval = 0;
 	
-	error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq);
+	error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype,
+			&kwq);
 	if (error != 0) {
 		return error;
 	}
@@ -1287,16 +1332,19 @@ _psynch_cvclrprepost(__unused proc_t p,
 	ksyn_wqlock(kwq);
 	
 	if (mutex) {
-		int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
-		if (firstfit && kwq->kw_pre_rwwc != 0) {
-			if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) {
-				// clear prepost
-				kwq->kw_pre_rwwc = 0;
-				kwq->kw_pre_lockseq = 0;
+		int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
+				== _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+		if (firstfit && kwq->kw_prepost.count) {
+			if (is_seqlower_eq(kwq->kw_prepost.lseq, cvgen)) {
+				PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
+						kwq->kw_prepost.lseq, 0, 2);
+				_kwq_clear_preposted_wakeup(kwq);
 			}
 		}
 	} else {
-		ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0);
+		PTHREAD_TRACE(psynch_cvar_clrprepost, kwq->kw_addr, wqtype,
+				preposeq, 0);
+		ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITE, preposeq, 0);
 	}
 	
 	ksyn_wqunlock(kwq);
@@ -1307,50 +1355,47 @@ _psynch_cvclrprepost(__unused proc_t p,
 /* ***************** pthread_rwlock ************************ */
 
 static int
-__psynch_rw_lock(int type,
-		 user_addr_t rwlock,
-		 uint32_t lgenval,
-		 uint32_t ugenval,
-		 uint32_t rw_wc,
-		 int flags,
-		 uint32_t *retval)
+__psynch_rw_lock(int type, user_addr_t rwlock, uint32_t lgenval,
+		 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
 {
-	int prepost_type, kqi;
+	uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
+	ksyn_wait_queue_t kwq;
+	int error, prepost_type, kqi;
+	thread_continue_t tc;
 
 	if (type == PTH_RW_TYPE_READ) {
 		prepost_type = KW_UNLOCK_PREPOST_READLOCK;
 		kqi = KSYN_QUEUE_READ;
+		tc = psynch_rw_rdcontinue;
 	} else {
 		prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
-		kqi = KSYN_QUEUE_WRITER;
+		kqi = KSYN_QUEUE_WRITE;
+		tc = psynch_rw_wrcontinue;
 	}
 
-	uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
+	error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
+			(KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq);
+	if (error != 0) {
+		return error;
+	}
 
-	int error;
-	ksyn_wait_queue_t kwq;
-	error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
-	if (error == 0) {
-		ksyn_wqlock(kwq);
-		_ksyn_check_init(kwq, lgenval);
-		if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) ||
-		    // handle overlap first as they are not counted against pre_rwwc
-		    (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) ||
-		    _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
-			ksyn_wqunlock(kwq);
-		} else {
-			block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
-				kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
-			error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL, block_hint);
-			// ksyn_wait drops wait queue lock
-			if (error == 0) {
-				uthread_t uth = current_uthread();
-				ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
-				*retval = kwe->kwe_psynchretval;
-			}
-		}
-		ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
+	ksyn_wqlock(kwq);
+	_ksyn_check_init(kwq, lgenval);
+	if (_kwq_handle_interrupted_wakeup(kwq, type, lockseq, retval) ||
+			// handle overlap first as they are not counted against pre_rwwc
+			// handle_overlap uses the flags in lgenval (vs. lockseq)
+			_kwq_handle_overlap(kwq, type, lgenval, rw_wc, retval) ||
+			_kwq_handle_preposted_wakeup(kwq, prepost_type, lockseq, retval)) {
+		ksyn_wqunlock(kwq);
+		goto out;
 	}
+
+	block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
+		kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
+	error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, 0, tc, block_hint);
+	// ksyn_wait drops wait queue lock
+out:
+	ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
 	return error;
 }
 
@@ -1358,28 +1403,20 @@ __psynch_rw_lock(int type,
  * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
  */
 int
-_psynch_rw_rdlock(__unused proc_t p,
-		  user_addr_t rwlock,
-		  uint32_t lgenval,
-		  uint32_t ugenval,
-		  uint32_t rw_wc,
-		  int flags,
-		  uint32_t *retval)
+_psynch_rw_rdlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+		uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
 {
-	return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval);
+	return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc,
+			flags, retval);
 }
 
 /*
  * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
  */
 int
-_psynch_rw_longrdlock(__unused proc_t p,
-		      __unused user_addr_t rwlock,
-		      __unused uint32_t lgenval,
-		      __unused uint32_t ugenval,
-		      __unused uint32_t rw_wc,
-		      __unused int flags,
-		      __unused uint32_t *retval)
+_psynch_rw_longrdlock(__unused proc_t p, __unused user_addr_t rwlock,
+		__unused uint32_t lgenval, __unused uint32_t ugenval,
+		__unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
 {
 	return ESRCH;
 }
@@ -1389,28 +1426,20 @@ _psynch_rw_longrdlock(__unused proc_t p,
  * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
  */
 int
-_psynch_rw_wrlock(__unused proc_t p,
-		  user_addr_t rwlock,
-		  uint32_t lgenval,
-		  uint32_t ugenval,
-		  uint32_t rw_wc,
-		  int flags,
-		  uint32_t *retval)
+_psynch_rw_wrlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+		uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
 {
-	return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval);
+	return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval,
+			rw_wc, flags, retval);
 }
 
 /*
  * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
  */
 int
-_psynch_rw_yieldwrlock(__unused proc_t p,
-		       __unused user_addr_t rwlock,
-		       __unused uint32_t lgenval,
-		       __unused uint32_t ugenval,
-		       __unused uint32_t rw_wc,
-		       __unused int flags,
-		       __unused uint32_t *retval)
+_psynch_rw_yieldwrlock(__unused proc_t p, __unused user_addr_t rwlock,
+		__unused uint32_t lgenval, __unused uint32_t ugenval,
+		__unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
 {
 	return ESRCH;
 }
@@ -1420,13 +1449,8 @@ _psynch_rw_yieldwrlock(__unused proc_t p,
  *			reader/writer variety lock.
  */
 int
-_psynch_rw_unlock(__unused proc_t p,
-		  user_addr_t rwlock,
-		  uint32_t lgenval,
-		  uint32_t ugenval,
-		  uint32_t rw_wc,
-		  int flags,
-		  uint32_t *retval)
+_psynch_rw_unlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
+		uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
 {
 	int error = 0;
 	ksyn_wait_queue_t kwq;
@@ -1436,7 +1460,8 @@ _psynch_rw_unlock(__unused proc_t p,
 	uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
 	int clearedkflags = 0;
 
-	error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
+	error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
+			(KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
 	if (error != 0) {
 		return(error);
 	}
@@ -1445,7 +1470,8 @@ _psynch_rw_unlock(__unused proc_t p,
 	int isinit = _ksyn_check_init(kwq, lgenval);
 
 	/* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
-	if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
+	if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) &&
+			(is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
 		error = 0;
 		goto out;
 	}
@@ -1466,7 +1492,7 @@ _psynch_rw_unlock(__unused proc_t p,
 	
 	/* can handle unlock now */
 	
-	CLEAR_PREPOST_BITS(kwq);
+	_kwq_clear_preposted_wakeup(kwq);
 	
 	error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
 #if __TESTPANICS__
@@ -1479,26 +1505,25 @@ out:
 		*retval = updatebits;
 	}
 
-	// <rdar://problem/22244050> If any of the wakeups failed because they already
-	// returned to userspace because of a signal then we need to ensure that the
-	// reset state is not cleared when that thread returns. Otherwise,
+	// <rdar://problem/22244050> If any of the wakeups failed because they
+	// already returned to userspace because of a signal then we need to ensure
+	// that the reset state is not cleared when that thread returns. Otherwise,
 	// _pthread_rwlock_lock will clear the interrupted state before it is read.
-	if (clearedkflags != 0 && kwq->kw_pre_intrcount > 0) {
+	if (clearedkflags != 0 && kwq->kw_intr.count > 0) {
 		kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
 	}
 	
 	ksyn_wqunlock(kwq);
+	pthread_kern->psynch_wait_cleanup();
 	ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
 	
 	return(error);
 	
 prepost:
 	/* update if the new seq is higher than prev prepost, or first set */
-	if (is_rws_setseq(kwq->kw_pre_sseq) ||
-	    is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) {
-		kwq->kw_pre_rwwc = (diff - count);
-		kwq->kw_pre_lockseq = curgen;
-		kwq->kw_pre_sseq = rw_wc;
+	if (is_rws_sbit_set(kwq->kw_prepost.sseq) ||
+			is_seqhigher_eq(rw_wc, kwq->kw_prepost.sseq)) {
+		_kwq_mark_preposted_wakeup(kwq, diff - count, curgen, rw_wc);
 		updatebits = lgenval;	/* let this not do unlock handling */
 	}
 	error = 0;
@@ -1526,13 +1551,9 @@ _pth_proc_hashinit(proc_t p)
 
 
 static int
-ksyn_wq_hash_lookup(user_addr_t uaddr,
-		    proc_t p,
-		    int flags,
-		    ksyn_wait_queue_t *out_kwq,
-		    struct pthhashhead **out_hashptr,
-		    uint64_t *out_object,
-		    uint64_t *out_offset)
+ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags,
+		ksyn_wait_queue_t *out_kwq, struct pthhashhead **out_hashptr,
+		uint64_t *out_object, uint64_t *out_offset)
 {
 	int res = 0;
 	ksyn_wait_queue_t kwq;
@@ -1593,9 +1614,8 @@ _pth_proc_hashdelete(proc_t p)
 			pthread_list_unlock();
 			/* release fake entries if present for cvars */
 			if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
-				ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
-			lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
-			pthread_kern->zfree(kwq_zone, kwq);
+				ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE]);
+			_kwq_destroy(kwq);
 			pthread_list_lock();
 		}
 	}
@@ -1611,14 +1631,49 @@ ksyn_freeallkwe(ksyn_queue_t kq)
 	while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
 		TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
 		if (kwe->kwe_state != KWE_THREAD_INWAIT) {
-			pthread_kern->zfree(kwe_zone, kwe);
+			zfree(kwe_zone, kwe);
 		}
 	}
 }
 
+static inline void
+_kwq_report_inuse(ksyn_wait_queue_t kwq)
+{
+	if (kwq->kw_prepost.count != 0) {
+		__FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [pre %d:0x%x:0x%x]",
+				(uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_prepost.count,
+				kwq->kw_prepost.lseq, kwq->kw_prepost.sseq);
+		PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+				kwq->kw_type, 1, 0);
+	}
+	if (kwq->kw_intr.count != 0) {
+		__FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [intr %d:0x%x:0x%x:0x%x]",
+				(uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_intr.count,
+				kwq->kw_intr.type, kwq->kw_intr.seq,
+				kwq->kw_intr.returnbits);
+		PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+				kwq->kw_type, 2, 0);
+	}
+	if (kwq->kw_iocount) {
+		__FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [ioc %d:%d]",
+				(uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_iocount,
+				kwq->kw_dropcount);
+		PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
+				kwq->kw_type, 3, 0);
+	}
+	if (kwq->kw_inqueue) {
+		__FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [inq %d:%d]",
+				(uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_inqueue,
+				kwq->kw_fakecount);
+		PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, kwq->kw_type,
+				4, 0);
+	}
+}
+
 /* find kernel waitqueue, if not present create one. Grants a reference  */
 int
-ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp)
+ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen,
+		int flags, int wqtype, ksyn_wait_queue_t *kwqp)
 {
 	int res = 0;
 	ksyn_wait_queue_t kwq = NULL;
@@ -1636,7 +1691,8 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
 
 	while (res == 0) {
 		pthread_list_lock();
-		res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset);
+		res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr,
+				&object, &offset);
 		if (res != 0) {
 			pthread_list_unlock();
 			break;
@@ -1645,13 +1701,13 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
 			// Drop the lock to allocate a new kwq and retry.
 			pthread_list_unlock();
 
-			nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone);
+			nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone);
 			bzero(nkwq, sizeof(struct ksyn_wait_queue));
 			int i;
 			for (i = 0; i < KSYN_QUEUE_MAX; i++) {
 				ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
 			}
-			lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
+			lck_spin_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
 			continue;
 		} else if (kwq == NULL && nkwq != NULL) {
 			// Still not found, add the new kwq to the hash.
@@ -1671,21 +1727,23 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
 				kwq->kw_pflags &= ~KSYN_WQ_FLIST;
 			}
 			if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
-				if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) {
+				if (!_kwq_is_used(kwq)) {
 					if (kwq->kw_iocount == 0) {
 						kwq->kw_type = 0; // mark for reinitialization
-					} else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) {
+					} else if (kwq->kw_iocount == 1 &&
+							kwq->kw_dropcount == kwq->kw_iocount) {
 						/* if all users are unlockers then wait for it to finish */
 						kwq->kw_pflags |= KSYN_WQ_WAITING;
 						// Drop the lock and wait for the kwq to be free.
-						(void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
+						(void)msleep(&kwq->kw_pflags, pthread_list_mlock,
+								PDROP, "ksyn_wqfind", 0);
 						continue;
 					} else {
-						__FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
+						_kwq_report_inuse(kwq);
 						res = EINVAL;
 					}
 				} else {
-					__FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
+					_kwq_report_inuse(kwq);
 					res = EINVAL;
 				}
 			}
@@ -1700,9 +1758,13 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
 				kwq->kw_lword = mgen;
 				kwq->kw_uword = ugen;
 				kwq->kw_sword = sgen;
-				kwq->kw_owner = 0;
+				kwq->kw_owner = THREAD_NULL;
 				kwq->kw_kflags = 0;
 				kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
+				PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_START, uaddr,
+						kwq->kw_type, kwq, 0);
+				PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_END, uaddr,
+						mgen, ugen, sgen);
 			}
 			kwq->kw_iocount++;
 			if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
@@ -1716,8 +1778,7 @@ ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int
 		*kwqp = kwq;
 	}
 	if (nkwq) {
-		lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
-		pthread_kern->zfree(kwq_zone, nkwq);
+		_kwq_destroy(nkwq);
 	}
 	return res;
 }
@@ -1740,7 +1801,16 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
 			wakeup(&kwq->kw_pflags);
 		}
 		
-		if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) {
+		if (!_kwq_is_used(kwq)) {
+			if (kwq->kw_turnstile) {
+				panic("kw_turnstile still non-null upon release");
+			}
+
+			PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_START,
+					kwq->kw_addr, kwq->kw_type, qfreenow, 0);
+			PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_END,
+					kwq->kw_addr, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword);
+
 			if (qfreenow == 0) {
 				microuptime(&kwq->kw_ts);
 				LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
@@ -1762,8 +1832,7 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
 	}
 	pthread_list_unlock();
 	if (free_elem != NULL) {
-		lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp);
-		pthread_kern->zfree(kwq_zone, free_elem);
+		_kwq_destroy(free_elem);
 	}
 }
 
@@ -1771,7 +1840,7 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
 void
 psynch_wq_cleanup(__unused void *param, __unused void * param1)
 {
-	ksyn_wait_queue_t kwq;
+	ksyn_wait_queue_t kwq, tmp;
 	struct timeval t;
 	int reschedule = 0;
 	uint64_t deadline = 0;
@@ -1783,7 +1852,7 @@ psynch_wq_cleanup(__unused void *param, __unused void * param1)
 	microuptime(&t);
 	
 	LIST_FOREACH(kwq, &pth_free_list, kw_list) {
-		if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) {
+		if (_kwq_is_used(kwq) || kwq->kw_iocount != 0) {
 			// still in use
 			continue;
 		}
@@ -1810,10 +1879,8 @@ psynch_wq_cleanup(__unused void *param, __unused void * param1)
 	}
 	pthread_list_unlock();
 
-	while ((kwq = LIST_FIRST(&freelist)) != NULL) {
-		LIST_REMOVE(kwq, kw_list);
-		lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
-		pthread_kern->zfree(kwq_zone, kwq);
+	LIST_FOREACH_SAFE(kwq, &freelist, kw_list, tmp) {
+		_kwq_destroy(kwq);
 	}
 }
 
@@ -1833,25 +1900,25 @@ _wait_result_to_errno(wait_result_t result)
 }
 
 int
-ksyn_wait(ksyn_wait_queue_t kwq,
-	  int kqi,
-	  uint32_t lockseq,
-	  int fit,
-	  uint64_t abstime,
-	  thread_continue_t continuation,
-	  block_hint_t block_hint)
+ksyn_wait(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, uint32_t lockseq,
+		int fit, uint64_t abstime, uint16_t kwe_flags,
+		thread_continue_t continuation, block_hint_t block_hint)
 {
-	int res;
-
 	thread_t th = current_thread();
 	uthread_t uth = pthread_kern->get_bsdthread_info(th);
+	struct turnstile **tstore = NULL;
+	int res;
+
+	assert(continuation != THREAD_CONTINUE_NULL);
+
 	ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
 	bzero(kwe, sizeof(*kwe));
 	kwe->kwe_count = 1;
 	kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
 	kwe->kwe_state = KWE_THREAD_INWAIT;
 	kwe->kwe_uth = uth;
-	kwe->kwe_tid = thread_tid(th);
+	kwe->kwe_thread = th;
+	kwe->kwe_flags = kwe_flags;
 
 	res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
 	if (res != 0) {
@@ -1859,43 +1926,39 @@ ksyn_wait(ksyn_wait_queue_t kwq,
 		ksyn_wqunlock(kwq);
 		return res;
 	}
-	
-	thread_set_pending_block_hint(th, block_hint);
-	assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
+
+	PTHREAD_TRACE(psynch_mutex_kwqwait, kwq->kw_addr, kwq->kw_inqueue,
+			kwq->kw_prepost.count, kwq->kw_intr.count);
+
+	if (_kwq_use_turnstile(kwq)) {
+		// pthread mutexes and rwlocks both (at least sometimes) know their
+		// owner and can use turnstiles. Otherwise, we pass NULL as the
+		// tstore to the shims so they wait on the global waitq.
+		tstore = &kwq->kw_turnstile;
+	}
+
+	pthread_kern->psynch_wait_prepare((uintptr_t)kwq, tstore, kwq->kw_owner,
+			block_hint, abstime);
+
 	ksyn_wqunlock(kwq);
-	
-	kern_return_t ret;
-	if (continuation == THREAD_CONTINUE_NULL) {
-		ret = thread_block(NULL);
-	} else {
-		ret = thread_block_parameter(continuation, kwq);
-		
-		// If thread_block_parameter returns (interrupted) call the
-		// continuation manually to clean up.
-		continuation(kwq, ret);
-		
-		// NOT REACHED
-		panic("ksyn_wait continuation returned");
+
+	if (tstore) {
+		pthread_kern->psynch_wait_update_complete(kwq->kw_turnstile);
 	}
 	
-	res = _wait_result_to_errno(ret);
-	if (res != 0) {
-		ksyn_wqlock(kwq);
-		if (kwe->kwe_kwqqueue) {
-			ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
-		}
-		ksyn_wqunlock(kwq);
-	}
-	return res;
+	thread_block_parameter(continuation, kwq);
+
+	// NOT REACHED
+	panic("ksyn_wait continuation returned");
+	__builtin_unreachable();
 }
 
 kern_return_t
-ksyn_signal(ksyn_wait_queue_t kwq,
-	    int kqi,
-	    ksyn_waitq_element_t kwe,
-	    uint32_t updateval)
+ksyn_signal(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
+		ksyn_waitq_element_t kwe, uint32_t updateval)
 {
 	kern_return_t ret;
+	struct turnstile **tstore = NULL;
 
 	// If no wait element was specified, wake the first.
 	if (!kwe) {
@@ -1912,7 +1975,12 @@ ksyn_signal(ksyn_wait_queue_t kwq,
 	ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
 	kwe->kwe_psynchretval = updateval;
 
-	ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
+	if (_kwq_use_turnstile(kwq)) {
+		tstore = &kwq->kw_turnstile;
+	}
+
+	ret = pthread_kern->psynch_wait_wakeup(kwq, kwe, tstore);
+
 	if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
 		panic("ksyn_signal: panic waking up thread %x\n", ret);
 	}
@@ -1925,7 +1993,8 @@ ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
 	kern_return_t ret;
 	vm_page_info_basic_data_t info;
 	mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
-	ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
+	ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr,
+			VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
 	if (ret != KERN_SUCCESS) {
 		return EINVAL;
 	}
@@ -1943,20 +2012,22 @@ ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
 
 /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
 int
-kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[])
+kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen,
+		int *typep, uint32_t lowest[])
 {
 	uint32_t kw_fr, kw_fwr, low;
 	int type = 0, lowtype, typenum[2] = { 0 };
 	uint32_t numbers[2] = { 0 };
 	int count = 0, i;
 	
-	
-	if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
+	if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) ||
+			((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
 		type |= PTH_RWSHFT_TYPE_READ;
 		/* read entries are present */
 		if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
 			kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
-			if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
+			if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) &&
+					(is_seqlower(premgen, kw_fr) != 0))
 				kw_fr = premgen;
 		} else
 			kw_fr = premgen;
@@ -1968,22 +2039,24 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type
 	} else
 		lowest[KSYN_QUEUE_READ] = 0;
 	
-	if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
+	if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) ||
+			((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
 		type |= PTH_RWSHFT_TYPE_WRITE;
 		/* read entries are present */
-		if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
-			kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
-			if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
+		if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
+			kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum;
+			if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) &&
+					(is_seqlower(premgen, kw_fwr) != 0))
 				kw_fwr = premgen;
 		} else
 			kw_fwr = premgen;
 		
-		lowest[KSYN_QUEUE_WRITER] = kw_fwr;
+		lowest[KSYN_QUEUE_WRITE] = kw_fwr;
 		numbers[count]= kw_fwr;
 		typenum[count] = PTH_RW_TYPE_WRITE;
 		count++;
 	} else
-		lowest[KSYN_QUEUE_WRITER] = 0;
+		lowest[KSYN_QUEUE_WRITE] = 0;
 	
 #if __TESTPANICS__
 	if (count == 0)
@@ -2009,7 +2082,8 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type
 
 /* wakeup readers to upto the writer limits */
 int
-ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp)
+ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders,
+		uint32_t updatebits, int *wokenp)
 {
 	ksyn_queue_t kq;
 	int failedwakeup = 0;
@@ -2020,7 +2094,8 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, ui
 	lbits = updatebits;
 	
 	kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
-	while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
+	while ((kq->ksynq_count != 0) &&
+			(allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
 		kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
 		if (kret == KERN_NOT_WAITING) {
 			failedwakeup++;
@@ -2034,19 +2109,17 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, ui
 }
 
 
-/* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
+/*
+ * This handles the unlock grants for next set on rw_unlock() or on arrival
+ * of all preposted waiters.
+ */
 int
-kwq_handle_unlock(ksyn_wait_queue_t kwq,
-		  __unused uint32_t mgen,
-		  uint32_t rw_wc,
-		  uint32_t *updatep,
-		  int flags,
-		  int *blockp,
-		  uint32_t premgen)
+kwq_handle_unlock(ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t rw_wc,
+		uint32_t *updatep, int flags, int *blockp, uint32_t premgen)
 {
 	uint32_t low_writer, limitrdnum;
 	int rwtype, error=0;
-	int allreaders, failed;
+	int allreaders, nfailed;
 	uint32_t updatebits=0, numneeded = 0;;
 	int prepost = flags & KW_UNLOCK_PREPOST;
 	thread_t preth = THREAD_NULL;
@@ -2067,7 +2140,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
 	kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
 	kwq->kw_lastseqword = rw_wc;
 	kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
-	kwq->kw_overlapwatch = 0;
+	kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
 	
 	error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
 #if __TESTPANICS__
@@ -2075,7 +2148,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
 		panic("rwunlock: cannot fails to slot next round of threads");
 #endif /* __TESTPANICS__ */
 	
-	low_writer = lowest[KSYN_QUEUE_WRITER];
+	low_writer = lowest[KSYN_QUEUE_WRITE];
 	
 	allreaders = 0;
 	updatebits = 0;
@@ -2108,7 +2181,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
 			} else {
 				// no writers at all
 				// no other waiters only readers
-				kwq->kw_overlapwatch = 1;
+				kwq->kw_kflags |= KSYN_KWF_OVERLAP_GUARD;
 				numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
 				if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
 					curthreturns = 1;
@@ -2128,18 +2201,19 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
 			}
 			
 			
-			failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken);
-			if (failed != 0) {
-				kwq->kw_pre_intrcount = failed;	/* actually a count */
-				kwq->kw_pre_intrseq = limitrdnum;
-				kwq->kw_pre_intrretbits = updatebits;
-				kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
+			nfailed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders,
+					updatebits, &woken);
+			if (nfailed != 0) {
+				_kwq_mark_interruped_wakeup(kwq, KWQ_INTR_READ, nfailed,
+						limitrdnum, updatebits);
 			}
 			
 			error = 0;
 			
-			if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
+			if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) && 
+					((updatebits & PTH_RWL_WBIT) == 0)) {
 				panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
+			}
 		}
 			break;
 			
@@ -2151,7 +2225,7 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
 			
 			if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
 				block = 0;
-				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
+				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
 					updatebits |= PTH_RWL_WBIT;
 				}
 				th = preth;
@@ -2161,23 +2235,23 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq,
 			} else {
 				/* we are not granting writelock to the preposting thread */
 				/* if there are writers present or the preposting write thread then W bit is to be set */
-				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 ||
+				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count > 1 ||
 				    (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
 					updatebits |= PTH_RWL_WBIT;
 				}
 				/* setup next in the queue */
-				kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits);
+				kret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, NULL, updatebits);
 				if (kret == KERN_NOT_WAITING) {
-					kwq->kw_pre_intrcount = 1;	/* actually a count */
-					kwq->kw_pre_intrseq = low_writer;
-					kwq->kw_pre_intrretbits = updatebits;
-					kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
+					_kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
+							low_writer, updatebits);
 				}
 				error = 0;
 			}
 			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
-			if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
+			if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != 
+					(PTH_RWL_KBIT | PTH_RWL_EBIT)) {
 				panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
+			}
 		}
 			break;
 			
@@ -2204,7 +2278,8 @@ ksyn_queue_init(ksyn_queue_t kq)
 }
 
 int
-ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit)
+ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe,
+		uint32_t mgen, int fit)
 {
 	ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
 	uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
@@ -2229,11 +2304,13 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint
 			kq->ksynq_lastnum = lockseq;
 		}
 	} else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
-		/* During prepost when a thread is getting cancelled, we could have two with same seq */
+		/* During prepost when a thread is getting cancelled, we could have
+		 * two with same seq */
 		res = EBUSY;
 		if (kwe->kwe_state == KWE_THREAD_PREPOST) {
 			ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
-			if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
+			if (tmp != NULL && tmp->kwe_uth != NULL &&
+					pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
 				TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
 				res = 0;
 			}
@@ -2267,7 +2344,8 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint
 }
 
 void
-ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
+ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+		ksyn_waitq_element_t kwe)
 {
 	if (kq->ksynq_count == 0) {
 		panic("removing item from empty queue");
@@ -2308,7 +2386,8 @@ ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_elemen
 }
 
 ksyn_waitq_element_t
-ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
+ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+		uint32_t seq)
 {
 	ksyn_waitq_element_t kwe;
 	
@@ -2334,7 +2413,8 @@ ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
 			result = kwe;
 			
 			// KWE_THREAD_INWAIT must be strictly equal
-			if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
+			if (kwe->kwe_state == KWE_THREAD_INWAIT &&
+					(kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
 				result = NULL;
 			}
 			break;
@@ -2345,7 +2425,8 @@ ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
 
 /* look for a thread at lockseq, a */
 ksyn_waitq_element_t
-ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
+ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
+		uint32_t uptoseq, uint32_t signalseq)
 {
 	ksyn_waitq_element_t result = NULL;
 	ksyn_waitq_element_t q_kwe, r_kwe;
@@ -2358,7 +2439,8 @@ ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint3
 				return result;
 			}
 		}
-		if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
+		if (q_kwe->kwe_state == KWE_THREAD_PREPOST |
+				q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
 			/* match any prepost at our same uptoseq or any broadcast above */
 			if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
 				continue;
@@ -2399,6 +2481,10 @@ ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
 	ksyn_waitq_element_t kwe;
 	uint32_t tseq = upto & PTHRW_COUNT_MASK;
 	ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
+	uint32_t freed = 0, signaled = 0;
+
+	PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_START, kwq->kw_addr,
+			kqi, upto, all);
 	
 	while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
 		if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
@@ -2411,17 +2497,28 @@ ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
 			 * return them as spurious wait so the cvar state gets
 			 * reset correctly.
 			 */
+
+			PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
+					kwq->kw_inqueue, 1);
 			
 			/* skip canceled ones */
 			/* wake the rest */
 			/* set M bit to indicate to waking CV to retun Inc val */
-			(void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
+			(void)ksyn_signal(kwq, kqi, kwe,
+					PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
+			signaled++;
 		} else {
+			PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
+					kwq->kw_inqueue, 2);
 			ksyn_queue_remove_item(kwq, kq, kwe);
-			pthread_kern->zfree(kwe_zone, kwe);
+			zfree(kwe_zone, kwe);
 			kwq->kw_fakecount--;
+			freed++;
 		}
 	}
+
+	PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_END, kwq->kw_addr, freed,
+			signaled, kwq->kw_inqueue);
 }
 
 /*************************************************************************/
@@ -2483,7 +2580,8 @@ find_nexthighseq(ksyn_wait_queue_t kwq)
 }
 
 int
-find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
+find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters,
+		uint32_t *countp)
 {
 	int i;
 	uint32_t count = 0;
@@ -2540,10 +2638,13 @@ ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
 {
 	ksyn_waitq_element_t kwe, newkwe;
 	uint32_t updatebits = 0;
-	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+	ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
 	
 	struct ksyn_queue kfreeq;
 	ksyn_queue_init(&kfreeq);
+
+	PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_START, ckwq->kw_addr, upto,
+			ckwq->kw_inqueue, 0);
 	
 retry:
 	TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
@@ -2555,11 +2656,14 @@ retry:
 		if (kwe->kwe_state == KWE_THREAD_INWAIT) {
 			// Wake only non-canceled threads waiting on this CV.
 			if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
-				(void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
+				PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, 0, 1);
+				(void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
 				updatebits += PTHRW_INC;
 			}
 		} else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
 			   kwe->kwe_state == KWE_THREAD_PREPOST) {
+			PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe,
+					kwe->kwe_state, 2);
 			ksyn_queue_remove_item(ckwq, kq, kwe);
 			TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
 			ckwq->kw_fakecount--;
@@ -2571,27 +2675,34 @@ retry:
 	/* Need to enter a broadcast in the queue (if not already at L == S) */
 	
 	if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
+		PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, ckwq->kw_lword,
+				ckwq->kw_sword, 3);
+
 		newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
 		if (newkwe == NULL) {
 			ksyn_wqunlock(ckwq);
-			newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
+			newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
 			TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
 			ksyn_wqlock(ckwq);
 			goto retry;
 		} else {
 			TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
 			ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
+			PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, newkwe, 0, 4);
 		}
 	}
 	
 	// free up any remaining things stumbled across above
 	while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
 		TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
-		pthread_kern->zfree(kwe_zone, kwe);
+		zfree(kwe_zone, kwe);
 	}
+
+	PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_END, ckwq->kw_addr,
+			updatebits, 0, 0);
 	
 	if (updatep != NULL) {
-		*updatep = updatebits;
+		*updatep |= updatebits;
 	}
 }
 
@@ -2601,7 +2712,7 @@ ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
 	if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
 		if (ckwq->kw_inqueue != 0) {
 			/* FREE THE QUEUE */
-			ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0);
+			ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 0);
 #if __TESTPANICS__
 			if (ckwq->kw_inqueue != 0)
 				panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
@@ -2619,8 +2730,10 @@ ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
 void
 psynch_zoneinit(void)
 {
-	kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
-	kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
+	kwq_zone = zinit(sizeof(struct ksyn_wait_queue),
+			8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
+	kwe_zone = zinit(sizeof(struct ksyn_waitq_element),
+			8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
 }
 
 void *
@@ -2641,13 +2754,14 @@ _pthread_get_thread_kwq(thread_t thread)
  * to pthread sync objects.
  */
 void
-_pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo * waitinfo)
+_pthread_find_owner(thread_t thread,
+		struct stackshot_thread_waitinfo * waitinfo)
 {
 	ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
 	switch (waitinfo->wait_type) {
 		case kThreadWaitPThreadMutex:
 			assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
-			waitinfo->owner   = kwq->kw_owner;
+			waitinfo->owner   = thread_tid(kwq->kw_owner);
 			waitinfo->context = kwq->kw_addr;
 			break;
 		/* Owner of rwlock not stored in kernel space due to races. Punt
diff --git a/kern/kern_trace.h b/kern/kern_trace.h
index e65e7b9..2e59edc 100644
--- a/kern/kern_trace.h
+++ b/kern/kern_trace.h
@@ -39,7 +39,9 @@
 // pthread tracing subclasses
 # define _TRACE_SUB_DEFAULT 0
 # define _TRACE_SUB_WORKQUEUE 1
-# define _TRACE_SUB_MUTEX 2
+// WQ_TRACE_REQUESTS_SUBCLASS is 2, in xnu
+# define _TRACE_SUB_MUTEX 3
+# define _TRACE_SUB_CONDVAR 4
 
 #ifndef _PTHREAD_BUILDING_CODES_
 
@@ -62,14 +64,14 @@ VM_UNSLIDE(void* ptr)
     return (void*)unslid_ptr;
 }
 
-# define PTHREAD_TRACE(x,a,b,c,d,e) \
-	{ if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, a, b, c, d, e); } }
+# define PTHREAD_TRACE(x,a,b,c,d) \
+	{ if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, a, b, c, d, 0); } }
 
-# define PTHREAD_TRACE_WQ(x,a,b,c,d,e) \
-	{ if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, VM_UNSLIDE(a), b, c, d, e); } }
+# define PTHREAD_TRACE_WQ(x,a,b,c,d) \
+	{ if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), b, c, d, 0); } }
 
 # define PTHREAD_TRACE_WQ_REQ(x,a,b,c,d,e) \
-	{ if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } }
+	{ if (pthread_debug_tracing) { KERNEL_DEBUG_CONSTANT(TRACE_##x, VM_UNSLIDE(a), VM_UNSLIDE(b), c, d, e); } }
 
 #else // KERNEL
 
@@ -138,5 +140,25 @@ TRACE_CODE(psynch_mutex_uunlock, _TRACE_SUB_MUTEX, 0x2);
 TRACE_CODE(psynch_ksyn_incorrect_owner, _TRACE_SUB_MUTEX, 0x3);
 TRACE_CODE(psynch_mutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x4);
 TRACE_CODE(psynch_mutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x5);
+TRACE_CODE(psynch_mutex_clearprepost, _TRACE_SUB_MUTEX, 0x6);
+TRACE_CODE(psynch_mutex_kwqallocate, _TRACE_SUB_MUTEX, 0x7);
+TRACE_CODE(psynch_mutex_kwqdeallocate, _TRACE_SUB_MUTEX, 0x8);
+TRACE_CODE(psynch_mutex_kwqprepost, _TRACE_SUB_MUTEX, 0x9);
+TRACE_CODE(psynch_mutex_markprepost, _TRACE_SUB_MUTEX, 0x10);
+TRACE_CODE(psynch_mutex_kwqcollision, _TRACE_SUB_MUTEX, 0x11);
+TRACE_CODE(psynch_ffmutex_lock_updatebits, _TRACE_SUB_MUTEX, 0x12);
+TRACE_CODE(psynch_ffmutex_unlock_updatebits, _TRACE_SUB_MUTEX, 0x13);
+TRACE_CODE(psynch_ffmutex_wake, _TRACE_SUB_MUTEX, 0x14);
+TRACE_CODE(psynch_mutex_kwqsignal, _TRACE_SUB_MUTEX, 0x15);
+TRACE_CODE(psynch_ffmutex_wait, _TRACE_SUB_MUTEX, 0x16);
+TRACE_CODE(psynch_mutex_kwqwait, _TRACE_SUB_MUTEX, 0x17);
+
+TRACE_CODE(psynch_cvar_kwait, _TRACE_SUB_CONDVAR, 0x0);
+TRACE_CODE(psynch_cvar_clrprepost, _TRACE_SUB_CONDVAR, 0x1);
+TRACE_CODE(psynch_cvar_freeitems, _TRACE_SUB_CONDVAR, 0x2);
+TRACE_CODE(psynch_cvar_signal, _TRACE_SUB_CONDVAR, 0x3);
+TRACE_CODE(psynch_cvar_broadcast, _TRACE_SUB_CONDVAR, 0x5);
+TRACE_CODE(psynch_cvar_zeroed, _TRACE_SUB_CONDVAR, 0x6);
+TRACE_CODE(psynch_cvar_updateval, _TRACE_SUB_CONDVAR, 0x7);
 
 #endif // _KERN_TRACE_H_
diff --git a/kern/synch_internal.h b/kern/synch_internal.h
index 6b22c41..1b9d6c2 100644
--- a/kern/synch_internal.h
+++ b/kern/synch_internal.h
@@ -24,6 +24,12 @@
 #ifndef __SYNCH_INTERNAL_H__
 #define __SYNCH_INTERNAL_H__
 
+// kwe_state
+enum {
+	KWE_THREAD_INWAIT = 1,
+	KWE_THREAD_PREPOST,
+	KWE_THREAD_BROADCAST,
+};
 
 #define _PTHREAD_MTX_OPT_PSHARED 0x010
 #define _PTHREAD_MTX_OPT_NOTIFY 0x1000	/* notify to drop mutex handling in cvwait */
@@ -79,8 +85,13 @@
 #define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0)
 
 // S word tests
-#define is_rws_setseq(x) (((x) & PTH_RWS_SBIT))
-#define is_rws_setunlockinit(x) (((x) & PTH_RWS_IBIT))
+#define is_rws_sbit_set(x) (((x) & PTH_RWS_SBIT) != 0)
+#define is_rws_unlockinit_set(x) (((x) & PTH_RWS_IBIT) != 0)
+#define is_rws_savemask_set(x) (((x) & PTHRW_RWS_SAVEMASK) != 0)
+#define is_rws_pbit_set(x) (((x) & PTH_RWS_CV_PBIT) != 0)
+
+// kwe_flags
+#define KWE_FLAG_LOCKPREPOST	0x1 // cvwait caused a lock prepost
 
 static inline int
 is_seqlower(uint32_t x, uint32_t y)
diff --git a/kern/workqueue_internal.h b/kern/workqueue_internal.h
index 28d870e..c044fe7 100644
--- a/kern/workqueue_internal.h
+++ b/kern/workqueue_internal.h
@@ -33,175 +33,18 @@
  * duplicate definitions that used to exist in both projects, when separate.
  */
 
-/* workq_kernreturn commands */
-#define WQOPS_THREAD_RETURN        0x04	/* parks the thread back into the kernel */
-#define WQOPS_QUEUE_NEWSPISUPP     0x10	/* this is to check for newer SPI support */
-#define WQOPS_QUEUE_REQTHREADS     0x20	/* request number of threads of a prio */
-#define WQOPS_QUEUE_REQTHREADS2    0x30	/* request a number of threads in a given priority bucket */
-#define WQOPS_THREAD_KEVENT_RETURN 0x40	/* parks the thread after delivering the passed kevent array */
-#define WQOPS_SET_EVENT_MANAGER_PRIORITY 0x80	/* max() in the provided priority in the the priority of the event manager */
-#define WQOPS_THREAD_WORKLOOP_RETURN 0x100	/* parks the thread after delivering the passed kevent array */
-#define WQOPS_SHOULD_NARROW 0x200	/* checks whether we should narrow our concurrency */
-
-/* flag values for upcall flags field, only 8 bits per struct threadlist */
-#define	WQ_FLAG_THREAD_PRIOMASK			0x0000ffff
-#define WQ_FLAG_THREAD_PRIOSHIFT		16
-#define	WQ_FLAG_THREAD_OVERCOMMIT		0x00010000	/* thread is with overcommit prio */
-#define	WQ_FLAG_THREAD_REUSE			0x00020000	/* thread is being reused */
-#define	WQ_FLAG_THREAD_NEWSPI			0x00040000	/* the call is with new SPIs */
-#define WQ_FLAG_THREAD_KEVENT			0x00080000  /* thread is response to kevent req */
-#define WQ_FLAG_THREAD_EVENT_MANAGER	0x00100000  /* event manager thread */
-#define WQ_FLAG_THREAD_TSD_BASE_SET		0x00200000  /* tsd base has already been set */
-#define WQ_FLAG_THREAD_WORKLOOP			0x00400000  /* workloop thread */
-
-#define WQ_THREAD_CLEANUP_QOS QOS_CLASS_DEFAULT
-
-#define WQ_KEVENT_LIST_LEN  16 // WORKQ_KEVENT_EVENT_BUFFER_LEN
-#define WQ_KEVENT_DATA_SIZE (32 * 1024)
-
-/* These definitions are only available to the kext, to avoid bleeding constants and types across the boundary to
- * the userspace library.
- */
-#ifdef KERNEL
-
-/* These defines come from kern/thread.h but are XNU_KERNEL_PRIVATE so do not get
- * exported to kernel extensions.
- */
-#define SCHED_CALL_BLOCK 0x1
-#define SCHED_CALL_UNBLOCK 0x2
-
-// kwe_state
-enum {
-	KWE_THREAD_INWAIT = 1,
-	KWE_THREAD_PREPOST,
-	KWE_THREAD_BROADCAST,
-};
-
-/* old workq priority scheme */
-
-#define WORKQUEUE_HIGH_PRIOQUEUE    0       /* high priority queue */
-#define WORKQUEUE_DEFAULT_PRIOQUEUE 1       /* default priority queue */
-#define WORKQUEUE_LOW_PRIOQUEUE     2       /* low priority queue */
-#define WORKQUEUE_BG_PRIOQUEUE      3       /* background priority queue */
-
-#define WORKQUEUE_NUM_BUCKETS 7
-
 // Sometimes something gets passed a bucket number and we need a way to express
-// that it's actually the event manager.  Use the (n+1)th bucket for that.
-#define WORKQUEUE_EVENT_MANAGER_BUCKET (WORKQUEUE_NUM_BUCKETS-1)
-
-/* wq_max_constrained_threads = max(64, N_CPU * WORKQUEUE_CONSTRAINED_FACTOR)
- * This used to be WORKQUEUE_NUM_BUCKETS + 1 when NUM_BUCKETS was 4, yielding
- * N_CPU * 5. When NUM_BUCKETS changed, we decided that the limit should
- * not change. So the factor is now always 5.
- */
-#define WORKQUEUE_CONSTRAINED_FACTOR 5
-
-#define WORKQUEUE_OVERCOMMIT	0x10000
-
-/*
- * A thread which is scheduled may read its own th_priority field without
- * taking the workqueue lock.  Other fields should be assumed to require the
- * lock.
- */
-struct threadlist {
-	TAILQ_ENTRY(threadlist) th_entry;
-	thread_t th_thread;
-	struct workqueue *th_workq;
-	mach_vm_offset_t th_stackaddr;
-	mach_port_name_t th_thport;
-	uint16_t th_flags;
-	uint8_t th_upcall_flags;
-	uint8_t th_priority;
-};
-
-#define TH_LIST_INITED		0x0001 /* Set at thread creation. */
-#define TH_LIST_RUNNING		0x0002 /* On thrunlist, not parked. */
-#define TH_LIST_KEVENT		0x0004 /* Thread requested by kevent */
-#define TH_LIST_NEW		0x0008 /* First return to userspace */
-#define TH_LIST_BUSY		0x0010 /* Removed from idle list but not ready yet. */
-#define TH_LIST_KEVENT_BOUND	0x0020 /* Thread bound to kqueues */
-#define TH_LIST_CONSTRAINED	0x0040 /* Non-overcommit thread. */
-#define TH_LIST_EVENT_MGR_SCHED_PRI	0x0080 /* Non-QoS Event Manager */
-#define TH_LIST_UNBINDING	0x0100 /* Thread is unbinding during park */
-#define TH_LIST_REMOVING_VOUCHER	0x0200 /* Thread is removing its voucher */
-#define TH_LIST_PACING		0x0400 /* Thread is participating in pacing */
-
-struct threadreq {
-	TAILQ_ENTRY(threadreq) tr_entry;
-	uint16_t tr_flags;
-	uint8_t tr_state;
-	uint8_t tr_priority;
-};
-TAILQ_HEAD(threadreq_head, threadreq);
-
-#define TR_STATE_NEW		0 /* Not yet enqueued */
-#define TR_STATE_WAITING	1 /* Waiting to be serviced - on reqlist */
-#define TR_STATE_COMPLETE	2 /* Request handled - for caller to free */
-#define TR_STATE_DEAD		3
-
-#define TR_FLAG_KEVENT		0x01
-#define TR_FLAG_OVERCOMMIT	0x02
-#define TR_FLAG_ONSTACK		0x04
-#define TR_FLAG_WORKLOOP	0x08
-#define TR_FLAG_NO_PACING	0x10
-
-#if defined(__LP64__)
-typedef unsigned __int128 wq_thactive_t;
-#else
-typedef uint64_t wq_thactive_t;
-#endif
-
-struct workqueue {
-	proc_t		wq_proc;
-	vm_map_t	wq_map;
-	task_t		wq_task;
-
-	lck_spin_t	wq_lock;
-
-	thread_call_t	wq_atimer_delayed_call;
-	thread_call_t	wq_atimer_immediate_call;
-
-	uint32_t _Atomic wq_flags;
-	uint32_t	wq_timer_interval;
-	uint32_t	wq_threads_scheduled;
-	uint32_t	wq_constrained_threads_scheduled;
-	uint32_t	wq_nthreads;
-	uint32_t	wq_thidlecount;
-	uint32_t	wq_event_manager_priority;
-	uint8_t		wq_lflags; // protected by wqueue lock
-	uint8_t		wq_paced; // protected by wqueue lock
-	uint16_t    __wq_unused;
-
-	TAILQ_HEAD(, threadlist) wq_thrunlist;
-	TAILQ_HEAD(, threadlist) wq_thidlelist;
-	TAILQ_HEAD(, threadlist) wq_thidlemgrlist;
-
-	uint32_t	wq_reqcount;	/* number of elements on the following lists */
-	struct threadreq_head wq_overcommit_reqlist[WORKQUEUE_EVENT_MANAGER_BUCKET];
-	struct threadreq_head wq_reqlist[WORKQUEUE_EVENT_MANAGER_BUCKET];
-	struct threadreq wq_event_manager_threadreq;
-
-	struct threadreq *wq_cached_threadreq;
-
-	uint16_t	wq_thscheduled_count[WORKQUEUE_NUM_BUCKETS];
-	_Atomic wq_thactive_t wq_thactive;
-	_Atomic uint64_t wq_lastblocked_ts[WORKQUEUE_NUM_BUCKETS];
-};
-#define WQ_EXITING		0x01
-#define WQ_ATIMER_DELAYED_RUNNING	0x02
-#define WQ_ATIMER_IMMEDIATE_RUNNING	0x04
-
-#define WQL_ATIMER_BUSY		0x01
-#define WQL_ATIMER_WAITING	0x02
-
-#define WORKQUEUE_MAXTHREADS		512
-#define WQ_STALLED_WINDOW_USECS		200
-#define WQ_REDUCE_POOL_WINDOW_USECS	5000000
-#define	WQ_MAX_TIMER_INTERVAL_USECS	50000
-
-#define WQ_THREADLIST_EXITING_POISON (void *)~0ul
-
-#endif // KERNEL
+// that it's actually the event manager.  Use the (0)th bucket for that.
+#define WORKQ_THREAD_QOS_MIN        (THREAD_QOS_MAINTENANCE)
+#define WORKQ_THREAD_QOS_MAX        (THREAD_QOS_LAST - 1)
+#define WORKQ_THREAD_QOS_CLEANUP    (THREAD_QOS_LEGACY)
+#define WORKQ_THREAD_QOS_MANAGER    (THREAD_QOS_LAST) // outside of MIN/MAX
+
+#define WORKQ_NUM_QOS_BUCKETS       (WORKQ_THREAD_QOS_MAX)
+#define WORKQ_NUM_BUCKETS           (WORKQ_THREAD_QOS_MAX + 1)
+#define WORKQ_IDX(qos)              ((qos) - 1) // 0 based index
+
+// magical `nkevents` values for _pthread_wqthread
+#define WORKQ_EXIT_THREAD_NKEVENT   (-1)
 
 #endif // _WORKQUEUE_INTERNAL_H_
diff --git a/libpthread.xcodeproj/project.pbxproj b/libpthread.xcodeproj/project.pbxproj
index 33df537..1c4fd1a 100644
--- a/libpthread.xcodeproj/project.pbxproj
+++ b/libpthread.xcodeproj/project.pbxproj
@@ -62,6 +62,20 @@
 /* End PBXAggregateTarget section */
 
 /* Begin PBXBuildFile section */
+		6E2A3BBE2101222F0003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		6E2A3BBF210122300003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		6E2A3BC0210122340003B53B /* stack_np.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E2A3BBD210122230003B53B /* stack_np.h */; };
+		6E5869C720C9040A00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
+		6E5869C820C9040B00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
+		6E5869C920C9040C00F1CB75 /* dependency_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 6E5869C620C8FE8300F1CB75 /* dependency_private.h */; };
+		6E5869CB20C9043200F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+		6E5869CC20C9043B00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+		6E5869CD20C9043B00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+		6E5869CE20C9043C00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+		6E5869CF20C9043C00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+		6E5869D020C9043D00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+		6E5869D120C9043D00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
+		6E5869D220C9043E00F1CB75 /* pthread_dependency.c in Sources */ = {isa = PBXBuildFile; fileRef = 6E5869CA20C9043200F1CB75 /* pthread_dependency.c */; };
 		6E8C16541B14F08A00C8987C /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; };
 		6E8C16551B14F08A00C8987C /* pthread.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325FA15B7513200270056 /* pthread.c */; };
 		6E8C16561B14F08A00C8987C /* pthread_cancelable.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F115B7513200270056 /* pthread_cancelable.c */; };
@@ -166,7 +180,6 @@
 		C9A1BF5015C9A59B006BB313 /* sched.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A3260115B7513700270056 /* sched.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		C9A1BF5315C9A9F5006BB313 /* pthread_cancelable_cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5215C9A9F5006BB313 /* pthread_cancelable_cancel.c */; };
 		C9A1BF5515C9CB9D006BB313 /* pthread_cancelable_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5415C9CB9D006BB313 /* pthread_cancelable_legacy.c */; };
-		C9A960B0183EB42700AE10C8 /* kern_policy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A960AF183EB42700AE10C8 /* kern_policy.c */; };
 		C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F915B7513200270056 /* workqueue_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		C9BB478D15E6ADF700F135B7 /* tsd_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F415B7513200270056 /* tsd_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		C9CCFB9D18B6D0910060CAAE /* qos_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C99B17DA189C2E1B00991D38 /* qos_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
@@ -334,11 +347,15 @@
 			containerPortal = C9A325D915B7347000270056 /* Project object */;
 			proxyType = 1;
 			remoteGlobalIDString = E4F4498C1E82C1F000A7FB9A;
-			remoteInfo = "libpthread alt resolved";
+			remoteInfo = "libpthread armv81 resolved";
 		};
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXFileReference section */
+		6E2A3BBD210122230003B53B /* stack_np.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack_np.h; sourceTree = "<group>"; };
+		6E514A0220B67C0900844EE1 /* offsets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = offsets.h; sourceTree = "<group>"; };
+		6E5869C620C8FE8300F1CB75 /* dependency_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = dependency_private.h; sourceTree = "<group>"; };
+		6E5869CA20C9043200F1CB75 /* pthread_dependency.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = pthread_dependency.c; sourceTree = "<group>"; };
 		6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
 		6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = pthread_introspection.xcconfig; sourceTree = "<group>"; };
 		6EB232C91B0EB29D005915CE /* resolver.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = resolver.c; sourceTree = "<group>"; };
@@ -444,7 +461,6 @@
 		C9A3260015B7513700270056 /* pthread_spis.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = pthread_spis.h; sourceTree = "<group>"; };
 		C9A3260115B7513700270056 /* sched.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sched.h; sourceTree = "<group>"; };
 		C9A3260C15B759B600270056 /* pthread.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = pthread.xcconfig; sourceTree = "<group>"; };
-		C9A960AF183EB42700AE10C8 /* kern_policy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kern_policy.c; sourceTree = "<group>"; };
 		C9A960B318452B2F00AE10C8 /* pthread.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; path = pthread.py; sourceTree = "<group>"; };
 		C9A960B618452CDD00AE10C8 /* install-lldbmacros.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "install-lldbmacros.sh"; sourceTree = "<group>"; };
 		C9C2212D15FA978D00447568 /* pthread.aliases */ = {isa = PBXFileReference; lastKnownFileType = text; path = pthread.aliases; sourceTree = "<group>"; };
@@ -460,7 +476,7 @@
 		E4943AA71E80BD8400D2A961 /* resolver_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resolver_internal.h; sourceTree = "<group>"; };
 		E4D962F919086AD600E8A9F2 /* qos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qos.h; sourceTree = "<group>"; };
 		E4D962FC19086C5700E8A9F2 /* install-sys-headers.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "install-sys-headers.sh"; sourceTree = "<group>"; };
-		E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_alt.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_armv81.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		E4F449A31E82CF0100A7FB9A /* resolver.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = resolver.xcconfig; sourceTree = "<group>"; };
 		E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
 		FC30E28D16A747AD00A25B5F /* synch_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = synch_internal.h; sourceTree = "<group>"; };
@@ -547,7 +563,6 @@
 				C9D9E8FE1626248800448CED /* pthread-Info.plist */,
 				C9C533841607C928009988FA /* kern_internal.h */,
 				C9169DDF1603DF9B005A2F8C /* kern_init.c */,
-				C9A960AF183EB42700AE10C8 /* kern_policy.c */,
 				C9169DDB1603DE84005A2F8C /* kern_synch.c */,
 				C9169DDC1603DE84005A2F8C /* kern_support.c */,
 				C979E9FB18A1BC2A000951E5 /* kern_trace.h */,
@@ -656,7 +671,7 @@
 				6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */,
 				C04545B81C584F4A006A53B3 /* libpthread.a */,
 				E41505E71E818BEB00F243FB /* libpthread_mp.a */,
-				E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */,
+				E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */,
 				E4F449D41E82D03500A7FB9A /* libsystem_pthread.dylib */,
 			);
 			name = Products;
@@ -665,6 +680,7 @@
 		C9A325ED15B74FB600270056 /* src */ = {
 			isa = PBXGroup;
 			children = (
+				6E514A0220B67C0900844EE1 /* offsets.h */,
 				C9A325F315B7513200270056 /* internal.h */,
 				C9A325EF15B7513200270056 /* plockstat.d */,
 				C9A325FA15B7513200270056 /* pthread.c */,
@@ -674,6 +690,7 @@
 				C9A325F215B7513200270056 /* pthread_cond.c */,
 				924D8EDE1C11832A002AC2BC /* pthread_cwd.c */,
 				C9A325F515B7513200270056 /* pthread_mutex.c */,
+				6E5869CA20C9043200F1CB75 /* pthread_dependency.c */,
 				C9A325F615B7513200270056 /* pthread_rwlock.c */,
 				C975D5DC15C9D16B0098ECD8 /* pthread_support.c */,
 				C9A325F815B7513200270056 /* pthread_tsd.c */,
@@ -694,6 +711,7 @@
 				C9A3260015B7513700270056 /* pthread_spis.h */,
 				C9A3260115B7513700270056 /* sched.h */,
 				C98C95D818FF1F4E005654FB /* spawn.h */,
+				6E2A3BBD210122230003B53B /* stack_np.h */,
 				C9244C1A185FCFED00075748 /* qos.h */,
 			);
 			path = pthread;
@@ -756,6 +774,7 @@
 				E4657D4017284F7B007D1847 /* introspection_private.h */,
 				C99B17DA189C2E1B00991D38 /* qos_private.h */,
 				E4063CF21906B4FB000202F9 /* qos.h */,
+				6E5869C620C8FE8300F1CB75 /* dependency_private.h */,
 			);
 			path = private;
 			sourceTree = "<group>";
@@ -805,6 +824,7 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6E2A3BC0210122340003B53B /* stack_np.h in Headers */,
 				6E8C16711B14F08A00C8987C /* posix_sched.h in Headers */,
 				6E8C166F1B14F08A00C8987C /* introspection_private.h in Headers */,
 				E41A64AE1E83C470009479A9 /* introspection.h in Headers */,
@@ -819,6 +839,7 @@
 				6E8C166B1B14F08A00C8987C /* pthread_impl.h in Headers */,
 				6E8C166D1B14F08A00C8987C /* pthread_spis.h in Headers */,
 				6E8C166E1B14F08A00C8987C /* sched.h in Headers */,
+				6E5869C920C9040C00F1CB75 /* dependency_private.h in Headers */,
 				6E8C16751B14F08A00C8987C /* spawn.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -848,6 +869,7 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6E2A3BBE2101222F0003B53B /* stack_np.h in Headers */,
 				C9244C1B185FD33000075748 /* qos.h in Headers */,
 				C9A1BF4D15C9A58E006BB313 /* pthread.h in Headers */,
 				C9A1BF4E15C9A594006BB313 /* pthread_impl.h in Headers */,
@@ -862,6 +884,7 @@
 				C98C95D918FF1F4E005654FB /* spawn.h in Headers */,
 				C99AD87C15DEC5290009A6F8 /* spinlock_private.h in Headers */,
 				C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */,
+				6E5869C720C9040A00F1CB75 /* dependency_private.h in Headers */,
 				C9153096167ACC2B006BB094 /* private.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -877,6 +900,7 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6E2A3BBF210122300003B53B /* stack_np.h in Headers */,
 				E4F449BE1E82D03500A7FB9A /* qos.h in Headers */,
 				E4F449BF1E82D03500A7FB9A /* pthread.h in Headers */,
 				E4F449C01E82D03500A7FB9A /* pthread_impl.h in Headers */,
@@ -891,6 +915,7 @@
 				E4F449C91E82D03500A7FB9A /* spawn.h in Headers */,
 				E4F449CA1E82D03500A7FB9A /* spinlock_private.h in Headers */,
 				E4F449CB1E82D03500A7FB9A /* workqueue_private.h in Headers */,
+				6E5869C820C9040B00F1CB75 /* dependency_private.h in Headers */,
 				E4F449CC1E82D03500A7FB9A /* private.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -1044,9 +1069,9 @@
 			productReference = E41505E71E818BEB00F243FB /* libpthread_mp.a */;
 			productType = "com.apple.product-type.library.static";
 		};
-		E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */ = {
+		E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */ = {
 			isa = PBXNativeTarget;
-			buildConfigurationList = E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread alt resolved" */;
+			buildConfigurationList = E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread armv81 resolved" */;
 			buildPhases = (
 				E4F4498D1E82C1F000A7FB9A /* Sources */,
 				E4F4499C1E82C1F000A7FB9A /* Symlink normal variant */,
@@ -1055,9 +1080,9 @@
 			);
 			dependencies = (
 			);
-			name = "libpthread alt resolved";
-			productName = libpthread_alt.a;
-			productReference = E4F449A01E82C1F000A7FB9A /* libpthread_alt.a */;
+			name = "libpthread armv81 resolved";
+			productName = libpthread_armv81.a;
+			productReference = E4F449A01E82C1F000A7FB9A /* libpthread_armv81.a */;
 			productType = "com.apple.product-type.library.static";
 		};
 		E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */ = {
@@ -1118,7 +1143,7 @@
 				E4F449A41E82D03500A7FB9A /* libsystem_pthread noresolver */,
 				6E8C16511B14F08A00C8987C /* libsystem_pthread introspection */,
 				E41505D01E818BEB00F243FB /* libpthread mp resolved */,
-				E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */,
+				E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */,
 				C04545A21C584F4A006A53B3 /* libpthread generic */,
 				C90E7A9E15DC3C3800A06D48 /* libpthread dyld */,
 				74E594911613AAF4006C417B /* libpthread eOS */,
@@ -1389,6 +1414,7 @@
 				6E8C16631B14F08A00C8987C /* pthread_support.c in Sources */,
 				6E8C16641B14F08A00C8987C /* thread_setup.c in Sources */,
 				6E8C16651B14F08A00C8987C /* pthread_atfork.c in Sources */,
+				6E5869CD20C9043B00F1CB75 /* pthread_dependency.c in Sources */,
 				6E8C16661B14F08A00C8987C /* pthread_asm.s in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -1400,6 +1426,7 @@
 				6EB232D01B0EB325005915CE /* resolver.c in Sources */,
 				74E594931613AAF4006C417B /* pthread.c in Sources */,
 				74E594941613AAF4006C417B /* pthread_cancelable.c in Sources */,
+				6E5869D220C9043E00F1CB75 /* pthread_dependency.c in Sources */,
 				74E594A61613AB10006C417B /* pthread_cancelable_cancel.c in Sources */,
 				74E594951613AAF4006C417B /* pthread_cond.c in Sources */,
 				74E594961613AAF4006C417B /* pthread_mutex.c in Sources */,
@@ -1421,6 +1448,7 @@
 				C04545A41C584F4A006A53B3 /* resolver.c in Sources */,
 				C04545A51C584F4A006A53B3 /* pthread.c in Sources */,
 				C04545A61C584F4A006A53B3 /* pthread_cancelable.c in Sources */,
+				6E5869D020C9043D00F1CB75 /* pthread_dependency.c in Sources */,
 				C04545A71C584F4A006A53B3 /* pthread_cancelable_cancel.c in Sources */,
 				C04545A81C584F4A006A53B3 /* pthread_cond.c in Sources */,
 				C04545A91C584F4A006A53B3 /* pthread_mutex.c in Sources */,
@@ -1445,6 +1473,7 @@
 				C90E7AA515DC3C9D00A06D48 /* pthread_cancelable.c in Sources */,
 				C90E7AA615DC3C9D00A06D48 /* pthread_cond.c in Sources */,
 				C90E7AA715DC3C9D00A06D48 /* pthread_mutex.c in Sources */,
+				6E5869D120C9043D00F1CB75 /* pthread_dependency.c in Sources */,
 				C90E7AA815DC3C9D00A06D48 /* pthread_rwlock.c in Sources */,
 				C90E7AA915DC3C9D00A06D48 /* pthread_support.c in Sources */,
 				C90E7AAA15DC3C9D00A06D48 /* pthread_tsd.c in Sources */,
@@ -1476,6 +1505,7 @@
 				C975D5DD15C9D16B0098ECD8 /* pthread_support.c in Sources */,
 				C948FCF715D1D1E100180BF5 /* thread_setup.c in Sources */,
 				C90E7AB815DC40D900A06D48 /* pthread_atfork.c in Sources */,
+				6E5869CB20C9043200F1CB75 /* pthread_dependency.c in Sources */,
 				C99AD88015E2D8B50009A6F8 /* pthread_asm.s in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -1485,7 +1515,6 @@
 			buildActionMask = 2147483647;
 			files = (
 				C9169DDE1603DE84005A2F8C /* kern_support.c in Sources */,
-				C9A960B0183EB42700AE10C8 /* kern_policy.c in Sources */,
 				C9169DE01603DF9B005A2F8C /* kern_init.c in Sources */,
 				C9D75E4216127B3900C2FB26 /* kern_synch.c in Sources */,
 			);
@@ -1498,6 +1527,7 @@
 				E41505D21E818BEB00F243FB /* resolver.c in Sources */,
 				E41505D31E818BEB00F243FB /* pthread.c in Sources */,
 				E41505D41E818BEB00F243FB /* pthread_cancelable.c in Sources */,
+				6E5869CE20C9043C00F1CB75 /* pthread_dependency.c in Sources */,
 				E41505D51E818BEB00F243FB /* pthread_cancelable_cancel.c in Sources */,
 				E41505D61E818BEB00F243FB /* pthread_cond.c in Sources */,
 				E41505D71E818BEB00F243FB /* pthread_mutex.c in Sources */,
@@ -1519,6 +1549,7 @@
 				E4F4498E1E82C1F000A7FB9A /* resolver.c in Sources */,
 				E4F4498F1E82C1F000A7FB9A /* pthread.c in Sources */,
 				E4F449901E82C1F000A7FB9A /* pthread_cancelable.c in Sources */,
+				6E5869CF20C9043C00F1CB75 /* pthread_dependency.c in Sources */,
 				E4F449911E82C1F000A7FB9A /* pthread_cancelable_cancel.c in Sources */,
 				E4F449921E82C1F000A7FB9A /* pthread_cond.c in Sources */,
 				E4F449931E82C1F000A7FB9A /* pthread_mutex.c in Sources */,
@@ -1554,6 +1585,7 @@
 				E4F449B81E82D03500A7FB9A /* pthread_support.c in Sources */,
 				E4F449B91E82D03500A7FB9A /* thread_setup.c in Sources */,
 				E4F449BA1E82D03500A7FB9A /* pthread_atfork.c in Sources */,
+				6E5869CC20C9043B00F1CB75 /* pthread_dependency.c in Sources */,
 				E4F449BB1E82D03500A7FB9A /* pthread_asm.s in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -1628,7 +1660,7 @@
 		};
 		E4F449A21E82C5A400A7FB9A /* PBXTargetDependency */ = {
 			isa = PBXTargetDependency;
-			target = E4F4498C1E82C1F000A7FB9A /* libpthread alt resolved */;
+			target = E4F4498C1E82C1F000A7FB9A /* libpthread armv81 resolved */;
 			targetProxy = E4F449A11E82C5A400A7FB9A /* PBXContainerItemProxy */;
 		};
 /* End PBXTargetDependency section */
@@ -1832,7 +1864,7 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */;
 			buildSettings = {
-				RESOLVED_VARIANT = alt;
+				RESOLVED_VARIANT = armv81;
 			};
 			name = Release;
 		};
@@ -1840,7 +1872,7 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = E41505E81E818D4D00F243FB /* resolved.xcconfig */;
 			buildSettings = {
-				RESOLVED_VARIANT = alt;
+				RESOLVED_VARIANT = armv81;
 			};
 			name = Debug;
 		};
@@ -1987,7 +2019,7 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
-		E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread alt resolved" */ = {
+		E4F4499D1E82C1F000A7FB9A /* Build configuration list for PBXNativeTarget "libpthread armv81 resolved" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				E4F4499E1E82C1F000A7FB9A /* Release */,
diff --git a/lldbmacros/init.py b/lldbmacros/init.py
new file mode 100644
index 0000000..af7fe69
--- /dev/null
+++ b/lldbmacros/init.py
@@ -0,0 +1,310 @@
+from xnu import *
+import struct
+
+def GetSeqCount(seq):
+	return (seq >> 8)
+
+def GetLSeqBits(seq):
+	rv = ""
+	if seq & 0x1:
+		rv += "K"
+	if seq & 0x2:
+		rv += "E"
+	if seq & 0x4:
+		rv += "W"
+	if seq & 0x20:
+		rv += "M"
+	if seq & 0x40:
+		rv += "U"
+	if seq & 0x80:
+		rv += "I"
+	return rv
+
+def GetSSeqBits(seq):
+	rv = ""
+	if seq & 0x1:
+		rv += "S"
+	if seq & 0x2:
+		rv += "I"
+	if seq & 0x4:
+		rv += "Ws"
+	return rv
+
+def GetLSeqSummary(seq):
+	return "{:d} {:s}".format(GetSeqCount(seq), GetLSeqBits(seq))
+
+def GetSSeqSummary(seq):
+	return "{:d} {:s}".format(GetSeqCount(seq), GetSSeqBits(seq))
+
+@header("{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}".format('sig', 'tid', 'options', 'lseq', 'useq'))
+def GetUserMutexSummary(task, uaddr):
+	if int(task.t_flags) & 0x1:
+		mtxlayout = "QIIhhIQIII"
+		padoffset = 1
+	else:
+		mtxlayout = "QIIhhQIII"
+		padoffset = 0
+
+	data = GetUserDataAsString(task, unsigned(uaddr), struct.calcsize(mtxlayout))
+	info = struct.unpack(mtxlayout, data)
+
+	format = "{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}"
+	sigstr = str("{0: <#020x}".format(info[0]))
+
+	# the options field dictates whether we were created misaligned
+	if info[2] & 0x800:
+		lseq = info[7+padoffset]
+		useq = info[8+padoffset]
+	else:
+		lseq = info[6+padoffset]
+		useq = info[7+padoffset]
+
+	return format.format(sigstr, hex(info[5+padoffset]), hex(info[2]), hex(lseq), hex(useq))
+
+@lldb_command('showusermutex')
+def PthreadShowUserMutex(cmd_args=None):
+	"""
+	display information about a userspace mutex at a given address
+	Syntax: (lldb) showusermutex <task_t> <uaddr>
+	"""
+	if not cmd_args:
+		raise ArgumentError("No arguments passed")
+	task = kern.GetValueFromAddress(cmd_args[0], "task_t")
+	uaddr = kern.GetValueFromAddress(cmd_args[1], "user_addr_t")
+
+	print GetUserMutexSummary.header
+	print GetUserMutexSummary(task, uaddr)
+
+@lldb_type_summary(['ksyn_wait_queue *', 'ksyn_wait_queue_t'])
+@header("{:<20s} {:<20s} {:<10s} {:<6s} {:<6s} {:<8s} {:<8s} {:<8s} {:<8s}".format('kwq', 'uaddr', 'type', 'pflags', 'kflags', 'refs', 'indrop', 'waiters', 'preposts'))
+def GetKwqSummary(kwq):
+	format = "{:<#20x} {:<#20x} {:<10s} {:<6s} {:<6s} {:<8d} {:<8d} {:<8d} {:<8d}\n"
+	kwq = Cast(kwq, "ksyn_wait_queue_t")
+
+	kwqtype = ""
+	if kwq.kw_type & 0xff == 0x01:
+		kwqtype = "mtx"
+	if kwq.kw_type & 0xff == 0x02:
+		kwqtype = "cvar"
+	if kwq.kw_type & 0xff == 0x04:
+		kwqtype = "rwl"
+	if kwq.kw_type & 0xff == 0x05:
+		kwqtype = "sema"
+
+	if kwq.kw_type & 0x1000 == 0x1000:
+		kwqtype += "W" # INWAIT
+	if kwq.kw_type & 0x2000 == 0x2000:
+		kwqtype += "D" # INDROP
+
+	pflags = ""
+	if kwq.kw_pflags & 0x2:
+		pflags += "H" # INHASH
+	if kwq.kw_pflags & 0x4:
+		pflags += "S" # SHARED
+	if kwq.kw_pflags & 0x8:
+		pflags += "W" # WAITING
+	if kwq.kw_pflags & 0x10:
+		pflags += "F" # FREELIST
+
+	kflags = ""
+	if kwq.kw_kflags & 0x1:
+		kflags += "C" # INITCLEARED
+	if kwq.kw_kflags & 0x2:
+		kflags += "Z" # ZEROED
+	if kwq.kw_kflags & 0x4:
+		kflags += "Q" # QOS APPLIED
+	if kwq.kw_kflags & 0x8:
+		kflags += "O" # OVERLAP
+
+	rs = format.format(kwq, kwq.kw_addr, kwqtype, pflags, kflags, kwq.kw_iocount, kwq.kw_dropcount, kwq.kw_inqueue, kwq.kw_fakecount)
+
+	rs += "\t{:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s}\n".format('lowest', 'highest', 'lword', 'uword', 'sword', 'last', 'next')
+	rs += "\t{:<10d} {:<10d} {:<10s} {:<10d} {:<10s} {:<10s} {:<10s}\n".format(
+			GetSeqCount(kwq.kw_lowseq), GetSeqCount(kwq.kw_highseq),
+			GetLSeqSummary(kwq.kw_lword), GetSeqCount(kwq.kw_uword),
+			GetSSeqSummary(kwq.kw_sword), GetSSeqSummary(kwq.kw_lastseqword),
+			GetSSeqSummary(kwq.kw_nextseqword))
+
+	rs += "\t{:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s} {:<10s}\n".format(
+			'pposts', 'lseq', 'sseq', 'intr', 'count', 'seq', 'bits')
+
+	intr_type = "NONE"
+	if kwq.kw_intr.type == 0x1:
+		intr_type = "READ"
+	elif kwq.kw_intr.type == 0x2:
+		intr_type = "WRITE"
+
+	rs += "\t{:<10d} {:<10s} {:<10s} {:<10s} {:<10d} {:<10s} {:<10s}\n".format(
+			kwq.kw_prepost.count,
+			GetLSeqSummary(kwq.kw_prepost.lseq), GetSSeqSummary(kwq.kw_prepost.sseq),
+			intr_type, kwq.kw_intr.count,
+			GetSSeqSummary(kwq.kw_intr.seq), GetSSeqSummary(kwq.kw_intr.returnbits))
+
+	rs += "\twaiting readers:\n"
+	for kwe in IterateTAILQ_HEAD(kwq.kw_ksynqueues[0].ksynq_kwelist, 'kwe_list'):
+		rs += "\t" + GetKweSummary.header + "\n"
+		rs += "\t" + GetKweSummary(kwe) + "\n"
+
+	rs += "\twaiting writers:\n"
+	for kwe in IterateTAILQ_HEAD(kwq.kw_ksynqueues[1].ksynq_kwelist, 'kwe_list'):
+		rs += "\t" + GetKweSummary.header + "\n"
+		rs += "\t" + GetKweSummary(kwe) + "\n"
+
+	if kwq.kw_turnstile:
+		rs += GetTurnstileSummary.header + "\n"
+		rs += GetTurnstileSummary(Cast(kwq.kw_turnstile, "struct turnstile *"))
+
+	return rs
+
+@lldb_type_summary(['ksyn_waitq_element *', 'ksyn_waitq_element_t'])
+@header("{:<20s} {:<20s} {:<10s} {:<10s} {:<20s} {:<20s}".format('kwe', 'kwq', 'lseq', 'state', 'uthread', 'thread'))
+def GetKweSummary(kwe):
+	format = "{:<#20x} {:<#20x} {:<10s} {:<10s} {:<#20x} {:<#20x}"
+	kwe = Cast(kwe, 'struct ksyn_waitq_element *')
+	state = ""
+	if kwe.kwe_state == 1:
+		state = "INWAIT"
+	elif kwe.kwe_state == 2:
+		state = "PPOST"
+	elif kwe.kwe_state == 3:
+		state = "BROAD"
+	else:
+		state = "{:#10x}".format(kwe.kwe_state)
+	return format.format(kwe, kwe.kwe_kwqqueue, GetLSeqSummary(kwe.kwe_lockseq), state, kwe.kwe_uth, kwe.kwe_thread)
+
+@header("{0: <24s} {1: <24s} {2: <24s}".format('thread', 'thread_id', 'uthread'))
+def GetPthreadSummary(thread):
+	format = "{0: <24s} {1: <24s} {2: <24s}"
+
+	threadstr = str("{0: <#020x}".format(thread))
+	if int(thread.static_param):
+		threadstr += "[WQ]"
+
+	uthread = Cast(thread.uthread, "uthread_t")
+	uthreadstr = str("{0: <#020x}".format(uthread))
+
+
+	return format.format(threadstr, hex(thread.thread_id), uthreadstr)
+
+@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags'))
+def GetPthreadWorkqueueSummary(wq):
+	format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}"
+	procstr = str("{0: <#020x}".format(wq.wq_proc))
+	wqstr = str("{0: <#020x}".format(wq))
+	
+	flags = []
+	if wq.wq_flags & 0x1:
+		flags.append("I")
+	if wq.wq_flags & 0x2:
+		flags.append("R")
+	if wq.wq_flags & 0x4:
+		flags.append("E")
+		
+	wqflags = []
+	if wq.wq_lflags & 0x1:
+		wqflags.append("B")
+	if wq.wq_lflags & 0x2:
+		wqflags.append("W")
+	if wq.wq_lflags & 0x4:
+		wqflags.append("C")
+	if wq.wq_lflags & 0x8:
+		wqflags.append("L")
+	
+	return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags))
+
+@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event'))
+def GetPthreadWorkqueueDetail(wq):
+	format = "  {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}"
+	# requests
+	schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6])
+	activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6])
+	return "\n".join([schedstr, activestr])
+
+@lldb_command('showthreadpsynch')
+def PthreadCurrentMutex(cmd_args=None):
+	"""
+	display information about a thread's pthread state
+	Syntax: (lldb) showthreadpsync <thread_t>
+	"""
+	if not cmd_args:
+		raise ArgumentError("No arguments passed")
+
+	thread = kern.GetValueFromAddress(cmd_args[0], "thread_t")
+	print GetPthreadSummary.header
+	print GetPthreadSummary(thread)
+
+	uthread = Cast(thread.uthread, "uthread_t")
+	kwe = Cast(addressof(uthread.uu_save.uus_kwe), 'struct ksyn_waitq_element *')
+	if not kwe or not kwe.kwe_kwqqueue:
+		print GetKweSummary.header
+		print GetKweSummary(kwe)
+	else:
+		print GetKwqSummary.header
+		print GetKwqSummary(kwe.kwe_kwqqueue)
+
+@lldb_command('showpthreadkwq')
+def PthreadShowKsynQueue(cmd_args=None):
+	"""
+	display information about a pthread ksyn_wait_queue_t
+	Syntax: (lldb) showpthreadkwq <ksyn_wait_queue_t>
+	"""
+	if not cmd_args:
+		raise ArgumentError("No arguments passed")
+
+	kwq = kern.GetValueFromAddress(cmd_args[0], "ksyn_wait_queue_t")
+	print GetKwqSummary.header
+	print GetKwqSummary(kwq)
+
+@lldb_command('showpthreadkwe')
+def PthreadShowKsynElement(cmd_args=None):
+	"""
+	display information about a thread's ksyn_waitq_element
+	Syntax: (lldb) showpthreadkwe <ksyn_waitq_element_t>	
+	"""
+	if not cmd_args:
+		raise ArgumentError("No arguments passed")
+
+	kwe = kern.GetValueFromAddress(cmd_args[0], "struct ksyn_waitq_element *")
+	print GetKweSummary.header
+	print GetKweSummary(kwe)
+
+@lldb_command('showpthreadworkqueue')
+def ShowPthreadWorkqueue(cmd_args=None):
+	"""
+	display information about a processes' pthread workqueue
+	Syntax: (lldb) showpthreadworkqueue <proc_t>
+	"""
+	
+	if not cmd_args:
+		raise ArgumentError("No arguments passed")
+		
+	proc = kern.GetValueFromAddress(cmd_args[0], "proc_t")
+	wq = Cast(proc.p_wqptr, "struct workqueue *");
+	
+	print GetPthreadWorkqueueSummary.header
+	print GetPthreadWorkqueueSummary(wq)
+	
+	print GetPthreadWorkqueueDetail.header
+	print GetPthreadWorkqueueDetail(wq)
+
+def IterateTAILQ_HEAD(headval, element_name):
+    """ iterate over a TAILQ_HEAD in kernel. refer to bsd/sys/queue.h
+        params:
+            headval     - value : value object representing the head of the list
+            element_name- str          :  string name of the field which holds the list links.
+        returns:
+            A generator does not return. It is used for iterating.
+            value : an object that is of type as headval->tqh_first. Always a pointer object
+        example usage:
+          list_head = kern.GetGlobalVariable('mountlist')
+          for entryobj in IterateTAILQ_HEAD(list_head, 'mnt_list'):
+            print GetEntrySummary(entryobj)
+    """
+    iter_val = headval.tqh_first
+    while unsigned(iter_val) != 0 :
+        yield iter_val
+        iter_val = iter_val.__getattr__(element_name).tqe_next
+    #end of yield loop
+
+def __lldb_init_module(debugger, internal_dict):
+	pass
diff --git a/lldbmacros/pthread.py b/lldbmacros/pthread.py
deleted file mode 100644
index a24779c..0000000
--- a/lldbmacros/pthread.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from xnu import *
-import struct
-
-@header("{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}".format('sig', 'tid', 'options', 'lseq', 'useq'))
-def GetUserMutexSummary(task, uaddr):
-	if int(task.t_flags) & 0x1:
-		mtxlayout = "QIIhhIQIII"
-		padoffset = 1
-	else:
-		mtxlayout = "QIIhhQIII"
-		padoffset = 0
-
-	data = GetUserDataAsString(task, uaddr, struct.calcsize(mtxlayout))
-	info = struct.unpack(mtxlayout, data)
-
-	format = "{0: <24s} {1: <16s} {2: <16s} {3: <16s} {4: <16s}"
-	sigstr = str("{0: <#020x}".format(info[0]))
-
-	# the options field dictates whether we were created misaligned
-	if info[2] & 0x800:
-		lseq = info[7+padoffset]
-		useq = info[8+padoffset]
-	else:
-		lseq = info[6+padoffset]
-		useq = info[7+padoffset]
-
-	return format.format(sigstr, hex(info[5+padoffset]), hex(info[2]), hex(lseq), hex(useq))
-
-@lldb_command('showusermutex')
-def PthreadShowUserMutex(cmd_args=None):
-	"""
-	display information about a userspace mutex at a given address
-	Syntax: (lldb) showusermutex <task_t> <uaddr>
-	"""
-	if not cmd_args:
-		raise ArgumentError("No arguments passed")
-	task = kern.GetValueFromAddress(cmd_args[0], "task_t")
-	uaddr = kern.GetValueFromAddress(cmd_args[1], "user_addr_t")
-
-	print GetUserMutexSummary.header
-	print GetUserMutexSummary(task, uaddr)
-
-@lldb_type_summary(['ksyn_waitq_element *', 'ksyn_waitq_element_t'])
-@header("{0: <24s} {1: <24s} {2: <24s} {3: <10s}".format('kwe', 'kwq', 'uaddr', 'type'))
-def GetKweSummary(kwe):
-	format = "{0: <24s} {1: <24s} {2: <24s} {3: <10s}"
-	kwe = Cast(addressof(kwe), "ksyn_waitq_element_t")
-	kwestr = str("{0: <#020x}".format(kwe))
-
-	kwq = Cast(kwe.kwe_kwqqueue, "ksyn_wait_queue_t")
-	kwqstr = str("{0: <#020x}".format(kwq))
-	uaddrstr = str("{0: <#020x}".format(kwq.kw_addr))
-
-	kwqtype = ""
-	if kwq.kw_type & 0xff == 0x01:
-		kwqtype = "mtx"
-	if kwq.kw_type & 0xff == 0x02:
-		kwqtype = "cvar"
-	if kwq.kw_type & 0xff == 0x04:
-		kwqtype = "rwlock"
-	if kwq.kw_type & 0xff == 0x05:
-		kwqtype = "sema"
-
-	return format.format(kwestr, kwqstr, uaddrstr, kwqtype)
-
-@header("{0: <24s} {1: <24s} {2: <24s}".format('thread', 'thread_id', 'uthread'))
-def GetPthreadSummary(thread):
-	format = "{0: <24s} {1: <24s} {2: <24s}"
-
-	threadstr = str("{0: <#020x}".format(thread))
-	if int(thread.static_param):
-		threadstr += "[WQ]"
-
-	uthread = Cast(thread.uthread, "uthread_t")
-	uthreadstr = str("{0: <#020x}".format(uthread))
-
-
-	return format.format(threadstr, hex(thread.thread_id), uthreadstr)
-
-@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags'))
-def GetPthreadWorkqueueSummary(wq):
-	format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}"
-	procstr = str("{0: <#020x}".format(wq.wq_proc))
-	wqstr = str("{0: <#020x}".format(wq))
-	
-	flags = []
-	if wq.wq_flags & 0x1:
-		flags.append("I")
-	if wq.wq_flags & 0x2:
-		flags.append("R")
-	if wq.wq_flags & 0x4:
-		flags.append("E")
-		
-	wqflags = []
-	if wq.wq_lflags & 0x1:
-		wqflags.append("B")
-	if wq.wq_lflags & 0x2:
-		wqflags.append("W")
-	if wq.wq_lflags & 0x4:
-		wqflags.append("C")
-	if wq.wq_lflags & 0x8:
-		wqflags.append("L")
-	
-	return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags))
-
-@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event'))
-def GetPthreadWorkqueueDetail(wq):
-	format = "  {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}"
-	# requests
-	schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6])
-	activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6])
-	return "\n".join([schedstr, activestr])
-
-@lldb_command('showpthreadstate')
-def PthreadCurrentMutex(cmd_args=None):
-	"""
-	display information about a thread's pthread state
-	Syntax: (lldb) showpthreadstate <thread_t>
-	"""
-	if not cmd_args:
-		raise ArgumentError("No arguments passed")
-
-	thread = kern.GetValueFromAddress(cmd_args[0], "thread_t")
-	print GetPthreadSummary.header
-	print GetPthreadSummary(thread)
-
-	uthread = Cast(thread.uthread, "uthread_t")
-	kwe = addressof(uthread.uu_kevent.uu_kwe)
-	print GetKweSummary.header
-	print GetKweSummary(kwe)
-
-@lldb_command('showpthreadworkqueue')
-def ShowPthreadWorkqueue(cmd_args=None):
-	"""
-	display information about a processes' pthread workqueue
-	Syntax: (lldb) showpthreadworkqueue <proc_t>
-	"""
-	
-	if not cmd_args:
-		raise ArgumentError("No arguments passed")
-		
-	proc = kern.GetValueFromAddress(cmd_args[0], "proc_t")
-	wq = Cast(proc.p_wqptr, "struct workqueue *");
-	
-	print GetPthreadWorkqueueSummary.header
-	print GetPthreadWorkqueueSummary(wq)
-	
-	print GetPthreadWorkqueueDetail.header
-	print GetPthreadWorkqueueDetail(wq)
-
-def __lldb_init_module(debugger, internal_dict):
-	pass
diff --git a/man/pthread_mutexattr.3 b/man/pthread_mutexattr.3
index 13e0861..756c407 100644
--- a/man/pthread_mutexattr.3
+++ b/man/pthread_mutexattr.3
@@ -81,6 +81,10 @@
 .Fn pthread_mutexattr_settype "pthread_mutexattr_t *attr" "int type"
 .Ft int
 .Fn pthread_mutexattr_gettype "pthread_mutexattr_t *attr" "int *type"
+.Ft int
+.Fn pthread_mutexattr_setpolicy_np "pthread_mutexattr_t *attr" "int policy"
+.Ft int
+.Fn pthread_mutexattr_getpolicy_np "pthread_mutexattr_t *attr" "int *policy"
 .Sh DESCRIPTION
 Mutex attributes are used to specify parameters to
 .Fn pthread_mutex_init .
@@ -164,6 +168,31 @@ This is the default mutex type for
 functions copy the type value of the attribute to the location pointed to by the second parameter.
 .Pp
 The
+.Fn pthread_mutexattr_setpolicy_np
+function sets the mutex
+.Fa policy
+value of the attribute.
+Valid mutex policies are:
+.Bl -tag -width "XXX" -offset 2n
+.It Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+The first-fit mutex policy allows acquisition of the mutex to occur in any
+order. This policy is similar in operation to os_unfair_lock, new contending
+acquirers may obtain ownership of the mutex ahead of existing waiters.
+.It Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+The fairshare mutex policy guarantees that ownership of a contended mutex will
+be granted to waiters on a strictly ordered first-in, first-out basis. That is,
+a mutex holder that unlocks the mutex and then attempts to relock will wait
+behind existing threads already waiting on the mutex before being granted
+ownership again.
+.El
+.Pp
+The
+.Fn pthread_mutexattr_getpolicy_np
+function copies the mutex
+.Fa policy
+value of the attribute to the location pointed to by the second parameter.
+.Pp
+The
 .Fn pthread_mutexattr_set*
 functions set the attribute that corresponds to each function name.
 .Pp
@@ -174,6 +203,39 @@ to the location pointed to by the second function parameter.
 .Sh RETURN VALUES
 If successful, these functions return 0.
 Otherwise, an error number is returned to indicate the error.
+.Sh ENVIRONMENT
+The following environment variables change the behavior of the pthread mutex
+implementation.
+.Bl -tag -width "XXX" -offset 2n
+.It Ev PTHREAD_MUTEX_DEFAULT_POLICY
+Controls the process-wide policy used when initializing a pthread_mutex_t that
+has not had a policy set via
+.Fn pthread_mutexattr_setpolicy_np .
+The valid values are mapped as:
+.Pp
+.Bl -tag -width "XXX"
+.It Fa 1
+.Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+.It Fa 3
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+.El
+.El
+.Sh BACKWARDS COMPATIBILITY
+Prior to macOS 10.14 (iOS and tvOS 12.0, watchOS 5.0) the only available
+pthread mutex policy mode was
+.Dv PTHREAD_MUTEX_POLICY_FAIRSHARE_NP .
+macOS 10.14 (iOS and tvOS 12.0, watchOS 5.0) introduces
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+and also makes this the default mode for mutexes initialized without a policy
+attribute set.
+.Pp
+Attempting to use
+.Fn pthread_mutexattr_setpolicy_np
+to set the policy of a pthread_mutex_t to
+.Dv PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+on earlier releases will fail with
+.Er EINVAL
+and the mutex will continue to operate in fairshare mode.
 .Sh ERRORS
 The
 .Fn pthread_mutexattr_init
@@ -252,6 +314,27 @@ function will fail if:
 Invalid value for
 .Fa attr .
 .El
+.Pp
+The
+.Fn pthread_mutexattr_setpolicy_np
+function will fail if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Invalid value for
+.Fa attr .
+.El
+.Pp
+The
+.Fn pthread_mutexattr_getpolicy_np
+function will fail if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The value specified either by
+.Fa type
+or
+.Fa attr
+is invalid.
+.El
 .Sh SEE ALSO
 .Xr pthread_mutex_init 3
 .Sh STANDARDS
diff --git a/private/dependency_private.h b/private/dependency_private.h
new file mode 100644
index 0000000..77d209f
--- /dev/null
+++ b/private/dependency_private.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef __PTHREAD_DEPENDENCY_PRIVATE__
+#define __PTHREAD_DEPENDENCY_PRIVATE__
+
+#include <os/base.h>
+#include <sys/cdefs.h>
+#include <pthread/pthread.h>
+#include <Availability.h>
+
+__BEGIN_DECLS
+
+OS_ASSUME_NONNULL_BEGIN
+
+/*!
+ * @typedef pthread_dependency_t
+ *
+ * @abstract
+ * A pthread dependency is a one-time dependency between a thread producing
+ * a value and a waiter thread, expressed to the system in a way
+ * that priority inversion avoidance can be applied if necessary.
+ *
+ * @discussion
+ * These tokens are one-time use, and meant to be on the stack of the waiter
+ * thread.
+ *
+ * These tokens must be both fulfilled and waited on, exactly one of each.
+ */
+typedef struct pthread_dependency_s {
+	uint32_t __pdep_owner;
+	uint32_t __pdep_opaque1;
+	uint64_t __pdep_opaque2;
+} pthread_dependency_t;
+
+/*!
+ * @typedef pthread_dependency_attr_t
+ *
+ * @abstract
+ * An opaque type to allow for future expansion of the pthread_dependency
+ * interface.
+ */
+typedef struct pthread_dependency_attr_s pthread_dependency_attr_t;
+
+#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE) || defined(__cplusplus)
+/*!
+ * @macro PTHREAD_DEPENDENCY_INITIALIZER_NP
+ *
+ * @abstract
+ * Initialize a one-time dependency token.
+ *
+ * @param __pthread
+ * The thread that will be waited on for this dependency to be fulfilled.
+ * It is expected that this thread will call pthread_dependency_fulfill_np().
+ */
+#define PTHREAD_DEPENDENCY_INITIALIZER_NP(__pthread) \
+		{ pthread_mach_thread_np(__pthread), 0, 0 }
+#endif
+
+/*!
+ * @function pthread_dependency_init_np
+ *
+ * @abstract
+ * Initialize a dependency token.
+ *
+ * @param __dependency
+ * A pointer to a dependency token to initialize.
+ *
+ * @param __pthread
+ * The thread that will be waited on for this dependency to be fulfilled.
+ * It is expected that this thread will call pthread_dependency_fulfill_np().
+ *
+ * @param __attrs
+ * This argument is reserved for future expansion purposes, and NULL should be
+ * passed.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NONNULL2 OS_NOTHROW
+void pthread_dependency_init_np(pthread_dependency_t *__dependency,
+		pthread_t __pthread, pthread_dependency_attr_t *_Nullable __attrs);
+
+/*!
+ * @function pthread_dependency_fulfill_np
+ *
+ * @abstract
+ * Fulfill a dependency.
+ *
+ * @discussion
+ * Calling pthread_dependency_fulfill_np() with a token that hasn't been
+ * initialized yet, or calling pthread_dependency_fulfill_np() on the same
+ * dependency token more than once is undefined and will cause the process
+ * to be terminated.
+ *
+ * The thread that calls pthread_dependency_fulfill_np() must be the same
+ * as the pthread_t that was specified when initializing the token. Not doing so
+ * is undefined and will cause the process to be terminated.
+ *
+ * @param __dependency
+ * A pointer to a dependency token that was previously initialized.
+ *
+ * @param __value
+ * An optional value that can be returned through the dependency token
+ * to the waiter.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NOTHROW
+void pthread_dependency_fulfill_np(pthread_dependency_t *__dependency,
+		void * _Nullable __value);
+
+/*!
+ * @function pthread_dependency_wait_np
+ *
+ * @abstract
+ * Wait on a dependency.
+ *
+ * @discussion
+ * Calling pthread_dependency_wait_np() with a token that hasn't been
+ * initialized yet, or calling pthread_dependency_wait_np() on the same
+ * dependency token more than once is undefined and will cause the process
+ * to be terminated.
+ *
+ * If the dependency is not fulfilled yet when this function is called, priority
+ * inversion avoidance will be applied to the thread that was specified when
+ * initializing the token, to ensure that it can call
+ * pthread_dependency_fulfill_np() without causing a priority inversion for the
+ * thread calling pthread_dependency_wait_np().
+ *
+ * @param __dependency
+ * A pointer to a dependency token that was previously initialized with
+ * PTHREAD_DEPENDENCY_INITIALIZER_NP() or pthread_dependency_init_np().
+ *
+ * @returns
+ * The value that was passed to pthread_dependency_fulfill_np() as the `__value`
+ * argument.
+ */
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+OS_NONNULL1 OS_NOTHROW
+void *_Nullable pthread_dependency_wait_np(pthread_dependency_t *__dependency);
+
+OS_ASSUME_NONNULL_END
+
+__END_DECLS
+
+#endif //__PTHREAD_DEPENDENCY_PRIVATE__
diff --git a/private/private.h b/private/private.h
index b98a350..b321442 100644
--- a/private/private.h
+++ b/private/private.h
@@ -93,6 +93,8 @@ int pthread_chdir_np(char *path);
 __API_AVAILABLE(macos(10.12), ios(10.0), tvos(10.0), watchos(3.0))
 int pthread_fchdir_np(int fd);
 
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int pthread_attr_setcpupercent_np(pthread_attr_t * __restrict, int, unsigned long);
 
 #ifdef _os_tsd_get_base
 
@@ -107,17 +109,17 @@ __header_always_inline uint64_t
 _pthread_threadid_self_np_direct(void)
 {
 #ifndef __i386__
-    if (_pthread_has_direct_tsd()) {
+	if (_pthread_has_direct_tsd()) {
 #ifdef OS_GS_RELATIVE
-        return *(uint64_t OS_GS_RELATIVE *)(_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
+		return *(uint64_t OS_GS_RELATIVE *)(_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
 #else
-        return *(uint64_t*)((char *)_os_tsd_get_base() + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
+		return *(uint64_t*)((char *)_os_tsd_get_base() + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET);
 #endif
-    }
+	}
 #endif
-    uint64_t threadid = 0;
-    pthread_threadid_np(NULL, &threadid);
-    return threadid;
+	uint64_t threadid = 0;
+	pthread_threadid_np(NULL, &threadid);
+	return threadid;
 }
 
 #endif // _os_tsd_get_base
diff --git a/private/qos_private.h b/private/qos_private.h
index 50f273a..6068a82 100644
--- a/private/qos_private.h
+++ b/private/qos_private.h
@@ -25,6 +25,7 @@
 #define _QOS_PRIVATE_H
 
 #include <pthread/qos.h>
+#include <pthread/priority_private.h>
 #include <sys/qos.h> /* qos_class_t */
 #include <sys/qos_private.h>
 
@@ -33,48 +34,6 @@
 #include <mach/port.h>
 #endif
 
-// pthread_priority_t is an on opaque integer that is guaranteed to be ordered such that
-// combations of QoS classes and relative priorities are ordered numerically, according to
-// their combined priority.
-typedef unsigned long pthread_priority_t;
-
-// masks for splitting the handling the contents of a pthread_priority_t, the mapping from
-// qos_class_t to the class bits, however, is intentionally not exposed.
-#define _PTHREAD_PRIORITY_FLAGS_MASK			0xff000000
-#define _PTHREAD_PRIORITY_FLAGS_SHIFT			(24ull)
-#define _PTHREAD_PRIORITY_ENCODING_MASK			0x00a00000
-#define _PTHREAD_PRIORITY_ENCODING_SHIFT		(22ull)
-#define _PTHREAD_PRIORITY_ENCODING_V0			0x00000000
-#define _PTHREAD_PRIORITY_ENCODING_V1			0x00400000 /* unused */
-#define _PTHREAD_PRIORITY_ENCODING_V2			0x00800000 /* unused */
-#define _PTHREAD_PRIORITY_ENCODING_V3			0x00a00000 /* unused */
-#define _PTHREAD_PRIORITY_QOS_CLASS_MASK		0x003fff00
-#define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT		(8ull)
-#define _PTHREAD_PRIORITY_PRIORITY_MASK			0x000000ff
-#define _PTHREAD_PRIORITY_PRIORITY_SHIFT		(0)
-
-#define _PTHREAD_PRIORITY_OVERCOMMIT_FLAG		0x80000000
-#define _PTHREAD_PRIORITY_INHERIT_FLAG			0x40000000
-#define _PTHREAD_PRIORITY_ROOTQUEUE_FLAG		0x20000000
-// Used to indicate to the pthread kext that the provided event manager thread
-// priority is actually a scheduling priority not a QoS.  We can have ROOTQUEUE_FLAG
-// perform double duty because it's never provided to the kernel.
-#define _PTHREAD_PRIORITY_SCHED_PRI_FLAG		0x20000000
-#define _PTHREAD_PRIORITY_SCHED_PRI_MASK		0x0000ffff
-#define _PTHREAD_PRIORITY_ENFORCE_FLAG			0x10000000
-#define _PTHREAD_PRIORITY_OVERRIDE_FLAG			0x08000000
-
-// libdispatch defines the following, so it's not safe to use for anything we
-// expect to be passed in from userspace
-#define _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG		0x04000000
-
-// The event manager flag indicates that this thread/request is for a event
-// manager thread.  There can only ever be one event manager thread at a time and
-// it is brought up at the highest of all event manager priorities passed to the
-// kext.
-#define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG	0x02000000
-#define _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG		0x01000000
-
 // redeffed here to avoid leaving __QOS_ENUM defined in the public header
 #define __QOS_ENUM(name, type, ...) enum { __VA_ARGS__ }; typedef type name##_t
 #define __QOS_AVAILABLE_10_10
diff --git a/private/tsd_private.h b/private/tsd_private.h
index f91c1f6..f9260fb 100644
--- a/private/tsd_private.h
+++ b/private/tsd_private.h
@@ -68,6 +68,10 @@
 #define __TSD_RETURN_TO_KERNEL 5
 #endif
 
+#ifndef __TSD_PTR_MUNGE
+#define __TSD_PTR_MUNGE 7
+#endif
+
 #ifndef __TSD_MACH_SPECIAL_REPLY
 #define __TSD_MACH_SPECIAL_REPLY 8
 #endif
@@ -81,6 +85,7 @@
 #define _PTHREAD_TSD_SLOT_MACH_THREAD_SELF __TSD_MACH_THREAD_SELF
 #define _PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS	__TSD_THREAD_QOS_CLASS
 #define _PTHREAD_TSD_SLOT_RETURN_TO_KERNEL __TSD_RETURN_TO_KERNEL
+#define _PTHREAD_TSD_SLOT_PTR_MUNGE __TSD_PTR_MUNGE
 #define _PTHREAD_TSD_SLOT_MACH_SPECIAL_REPLY __TSD_MACH_SPECIAL_REPLY
 //#define _PTHREAD_TSD_SLOT_SEMAPHORE_CACHE __TSD_SEMAPHORE_CACHE
 
diff --git a/private/workqueue_private.h b/private/workqueue_private.h
index 0b0a001..9cd0e95 100644
--- a/private/workqueue_private.h
+++ b/private/workqueue_private.h
@@ -179,6 +179,14 @@ __API_AVAILABLE(macos(10.10.2))
 int
 _pthread_workqueue_asynchronous_override_reset_all_self(void);
 
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int
+_pthread_workloop_create(uint64_t workloop_id, uint64_t options, pthread_attr_t *attr);
+
+__API_AVAILABLE(macos(10.14), ios(12.0), tvos(12.0), watchos(5.0))
+int
+_pthread_workloop_destroy(uint64_t workloop_id);
+
 __END_DECLS
 
 #endif // __PTHREAD_WORKQUEUE_H__
diff --git a/pthread/introspection.h b/pthread/introspection.h
index 1829208..10b719a 100644
--- a/pthread/introspection.h
+++ b/pthread/introspection.h
@@ -64,18 +64,40 @@ typedef void (*pthread_introspection_hook_t)(unsigned int event,
 
 /*!
  * @enum pthread_introspection_event_t
+ * Events sent by libpthread about threads lifetimes.
  *
- * @constant PTHREAD_INTROSPECTION_THREAD_CREATE
- * pthread_t was created.
+ * @const PTHREAD_INTROSPECTION_THREAD_CREATE
+ * The specified pthread_t was created, and there will be a paired
+ * PTHREAD_INTROSPECTION_THREAD_DESTROY event. However, there may not be
+ * a START/TERMINATE pair of events for this pthread_t.
  *
- * @constant PTHREAD_INTROSPECTION_THREAD_START
- * Thread has started and stack was allocated.
+ * Starting with macOS 10.14, and iOS 12, this event is always sent before
+ * PTHREAD_INTROSPECTION_THREAD_START is sent. This event is however not sent
+ * for the main thread.
  *
- * @constant PTHREAD_INTROSPECTION_THREAD_TERMINATE
- * Thread is about to be terminated and stack will be deallocated.
+ * This event may not be sent from the context of the passed in pthread_t.
  *
- * @constant PTHREAD_INTROSPECTION_THREAD_DESTROY
- * pthread_t is about to be destroyed.
+ * Note that all properties of this thread may not be functional yet, and it is
+ * not permitted to call functions on this thread past observing its address.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_START
+ * Thread has started and its stack was allocated. There will be a matching
+ * PTHREAD_INTROSPECTION_THREAD_TERMINATE event.
+ *
+ * This event is always sent from the context of the passed in pthread_t.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_TERMINATE
+ * Thread is about to be terminated and stack will be deallocated. This always
+ * matches a PTHREAD_INTROSPECTION_THREAD_START event.
+ *
+ * This event is always sent from the context of the passed in pthread_t.
+ *
+ * @const PTHREAD_INTROSPECTION_THREAD_DESTROY
+ * pthread_t is about to be destroyed. This always matches
+ * a PTHREAD_INTROSPECTION_THREAD_CREATE event, but there may not have been
+ * a START/TERMINATE pair of events for this pthread_t.
+ *
+ * This event may not be sent from the context of the passed in pthread_t.
  */
 enum {
 	PTHREAD_INTROSPECTION_THREAD_CREATE = 1,
diff --git a/pthread/pthread.h b/pthread/pthread.h
index 0e2ecb7..f5fdff6 100644
--- a/pthread/pthread.h
+++ b/pthread/pthread.h
@@ -171,6 +171,12 @@ __BEGIN_DECLS
 #define PTHREAD_MUTEX_RECURSIVE		2
 #define PTHREAD_MUTEX_DEFAULT		PTHREAD_MUTEX_NORMAL
 
+/*
+ * Mutex policy attributes
+ */
+#define PTHREAD_MUTEX_POLICY_FAIRSHARE_NP   1
+#define PTHREAD_MUTEX_POLICY_FIRSTFIT_NP    3
+
 /*
  * RWLock variables
  */
@@ -405,6 +411,10 @@ __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_mutexattr_gettype(const pthread_mutexattr_t * __restrict,
 		int * __restrict);
 
+__API_AVAILABLE(macos(10.13.4), ios(11.3), watchos(4.3), tvos(11.3))
+int pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t * __restrict,
+		int * __restrict);
+
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_mutexattr_init(pthread_mutexattr_t *);
 
@@ -420,6 +430,9 @@ int pthread_mutexattr_setpshared(pthread_mutexattr_t *, int);
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_mutexattr_settype(pthread_mutexattr_t *, int);
 
+__API_AVAILABLE(macos(10.7), ios(5.0))
+int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int);
+
 __SWIFT_UNAVAILABLE_MSG("Use lazily initialized globals instead")
 __API_AVAILABLE(macos(10.4), ios(2.0))
 int pthread_once(pthread_once_t *, void (* _Nonnull)(void));
diff --git a/pthread/pthread_spis.h b/pthread/pthread_spis.h
index a0ba754..91fb641 100644
--- a/pthread/pthread_spis.h
+++ b/pthread/pthread_spis.h
@@ -63,19 +63,13 @@ __BEGIN_DECLS
 #if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE)
 /* firstfit */
 #define PTHREAD_FIRSTFIT_MUTEX_INITIALIZER {_PTHREAD_FIRSTFIT_MUTEX_SIG_init, {0}}
+
 /*
  * Mutex attributes
  */
-#define _PTHREAD_MUTEX_POLICY_NONE		0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE		1
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT		2
-
-/* manipulate the mutex policy attributes */
-__API_AVAILABLE(macos(10.7), ios(5.0))
-int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int );
-
-__API_AVAILABLE(macos(10.13.4), ios(11.3))
-int pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *, int * );
+#define _PTHREAD_MUTEX_POLICY_NONE			PTHREAD_MUTEX_POLICY_NONE
+#define _PTHREAD_MUTEX_POLICY_FAIRSHARE		PTHREAD_MUTEX_POLICY_FAIRSHARE_NP
+#define _PTHREAD_MUTEX_POLICY_FIRSTFIT		PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
 
 #endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */
 
diff --git a/pthread/stack_np.h b/pthread/stack_np.h
new file mode 100644
index 0000000..9b5f513
--- /dev/null
+++ b/pthread/stack_np.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+#ifndef __PTHREAD_STACK_NP__
+#define __PTHREAD_STACK_NP__
+
+#include <Availability.h>
+#include <sys/cdefs.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <os/base.h>
+
+OS_ASSUME_NONNULL_BEGIN
+
+/*! @header
+ * Low-level API to introspect thread stacks.
+ */
+
+__BEGIN_DECLS
+
+/*!
+ * @function pthread_stack_frame_decode_np
+ *
+ * @abstract
+ * Decodes the return address and the next stack frame address
+ * from the given stack frame address.
+ *
+ * @discussion
+ * Validation of the frame address is not performed by this function.
+ * The caller is responsible for making sure the frame address is valid,
+ * for example using pthread_get_stackaddr_np() and pthread_get_stacksize_np().
+ *
+ * @param frame_addr
+ * A valid stack frame address such as __builtin_frame_address(0) or the return
+ * value of a previous call to pthread_stack_frame_decode_np().
+ *
+ * @param return_addr
+ * An optional out paramter that will be filled with the return address stored
+ * at the specified stack frame.
+ *
+ * @returns
+ * This returns the next frame address stored at the specified stack frame.
+ */
+__OSX_AVAILABLE(10.14) __IOS_AVAILABLE(12.0)
+__TVOS_AVAILABLE(12.0) __WATCHOS_AVAILABLE(5.0)
+uintptr_t
+pthread_stack_frame_decode_np(uintptr_t frame_addr,
+		uintptr_t *_Nullable return_addr);
+
+__END_DECLS
+
+OS_ASSUME_NONNULL_END
+
+#endif // __PTHREAD_STACK_NP__
diff --git a/src/internal.h b/src/internal.h
index 9f2e127..c9c16c7 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -70,6 +70,8 @@ typedef struct _pthread_attr_t pthread_attr_t;
 #include <mach/mach.h>
 #include <mach/mach_error.h>
 #include <sys/queue.h>
+#include <pthread/bsdthread_private.h>
+#include <pthread/workqueue_syscalls.h>
 
 #define __OS_EXPOSE_INTERNALS__ 1
 #include <os/internal/internal_shared.h>
@@ -125,19 +127,24 @@ typedef os_unfair_lock _pthread_lock;
 #define _PTHREAD_UNLOCK(lock) os_unfair_lock_unlock_inline(&(lock))
 #define _PTHREAD_UNLOCK_FROM_MACH_THREAD(lock) os_unfair_lock_unlock_inline_no_tsd_4libpthread(&(lock))
 
+#define _PTHREAD_POLICY_IS_FIXEDPRI(x) ((x) == SCHED_RR || (x) == SCHED_FIFO)
+
+extern int __is_threaded;
+extern int __unix_conforming;
+
 // List of all pthreads in the process.
 TAILQ_HEAD(__pthread_list, _pthread);
-extern struct __pthread_list __pthread_head;
+PTHREAD_NOEXPORT extern struct __pthread_list __pthread_head;
 
 // Lock protects access to above list.
-extern _pthread_lock _pthread_list_lock;
+PTHREAD_NOEXPORT extern _pthread_lock _pthread_list_lock;
 
-extern int __is_threaded;
+PTHREAD_NOEXPORT extern uint32_t _main_qos;
 
 #if PTHREAD_DEBUG_LOG
 #include <mach/mach_time.h>
-extern int _pthread_debuglog;
-extern uint64_t _pthread_debugstart;
+PTHREAD_NOEXPORT extern int _pthread_debuglog;
+PTHREAD_NOEXPORT extern uint64_t _pthread_debugstart;
 #endif
 
 /*
@@ -153,6 +160,8 @@ extern uint64_t _pthread_debugstart;
 #define _INTERNAL_POSIX_THREAD_KEYS_END 768
 #endif
 
+#define PTHREAD_T_OFFSET 0
+
 #define MAXTHREADNAMESIZE	64
 #define _PTHREAD_T
 typedef struct _pthread {
@@ -165,52 +174,56 @@ typedef struct _pthread {
 	//
 	// SPI - These fields are private.
 	//
-	// these fields are globally protected by _pthread_list_lock:
-	uint32_t childrun:1,
-			parentcheck:1,
-			childexit:1,
-			pad3:29;
-
-	_pthread_lock lock; // protect access to everything below
-	uint32_t detached:8,
-			inherit:8,
-			policy:8,
-			kernalloc:1,
-			schedset:1,
-			wqthread:1,
-			wqkillset:1,
-			pad:4;
-
-#if defined(__LP64__)
-	uint32_t pad0;
-#endif
-
-	void *(*fun)(void*);	// thread start routine
-	void *arg;		// thread start routine argument
-	void *exit_value;	// thread exit value storage
-
-	semaphore_t joiner_notify;	// pthread_join notification
-
-	int max_tsd_key;
-	int cancel_state;	// whether the thread can be cancelled
-	int cancel_error;
 
-	int err_no;		// thread-local errno
+	//
+	// Fields protected by _pthread_list_lock
+	//
 
-	struct _pthread *joiner;
+	TAILQ_ENTRY(_pthread) tl_plist;	// global thread list [aligned]
+	struct pthread_join_context_s *tl_join_ctx;
+	void *tl_exit_value;
+	uint32_t tl_policy:8,
+			tl_joinable:1,
+			tl_joiner_cleans_up:1,
+			tl_has_custom_stack:1,
+			__tl_pad:21;
+	// MACH_PORT_NULL if no joiner
+	// tsd[_PTHREAD_TSD_SLOT_MACH_THREAD_SELF] when has a joiner
+	// MACH_PORT_DEAD if the thread exited
+	uint32_t tl_exit_gate;
+	struct sched_param tl_param;
 
-	struct sched_param param;	// [aligned]
+	//
+	// Fields protected by pthread_t::lock
+	//
 
-	TAILQ_ENTRY(_pthread) plist;	// global thread list [aligned]
+	_pthread_lock lock;
+	uint16_t max_tsd_key;
+	uint16_t inherit:8,
+			kernalloc:1,
+			schedset:1,
+			wqthread:1,
+			wqkillset:1,
+			wqoutsideqos:1,
+			__flags_pad:3;
 
 	char pthread_name[MAXTHREADNAMESIZE];	// includes NUL [aligned]
 
-	void *stackaddr;	// base of the stack (page aligned)
-	size_t stacksize;	// size of stack (page multiple and >= PTHREAD_STACK_MIN)
-
-	void* freeaddr;		// stack/thread allocation base address
-	size_t freesize;	// stack/thread allocation size
-	size_t guardsize;	// guard page size in bytes
+	void *(*fun)(void *);	// thread start routine
+	void *wq_kqid_ptr;		// wqthreads (workloop)
+	void *arg;				// thread start routine argument
+	int   wq_nevents;		// wqthreads (workloop / kevent)
+	uint16_t wq_retop;		// wqthreads
+	uint8_t cancel_state;	// whether the thread can be canceled [atomic]
+	uint8_t canceled;		// 4597450 set if conformant cancelation happened
+	errno_t cancel_error;
+	errno_t err_no;			// thread-local errno
+
+	void *stackaddr;		// base of the stack (page aligned)
+	void *stackbottom;		// stackaddr - stacksize
+	void *freeaddr;			// stack/thread allocation base address
+	size_t freesize;		// stack/thread allocation size
+	size_t guardsize;		// guard page size in bytes
 
 	// tsd-base relative accessed elements
 	__attribute__((aligned(8)))
@@ -228,39 +241,39 @@ typedef struct _pthread {
 	void *tsd[_EXTERNAL_POSIX_THREAD_KEYS_MAX + _INTERNAL_POSIX_THREAD_KEYS_MAX];
 } *pthread_t;
 
-
+#define _PTHREAD_ATTR_REFILLMS_MAX ((2<<24) - 1)
 struct _pthread_attr_t {
-	long sig;
-	_pthread_lock lock;
-	uint32_t detached:8,
+	long   sig;
+	size_t guardsize; // size in bytes of stack overflow guard area
+	void  *stackaddr; // stack base; vm_page_size aligned
+	size_t stacksize; // stack size; multiple of vm_page_size and >= PTHREAD_STACK_MIN
+	union {
+		struct sched_param param; // [aligned]
+		unsigned long qosclass; // pthread_priority_t
+	};
+	uint32_t
+		detached:8,
 		inherit:8,
 		policy:8,
-		fastpath:1,
 		schedset:1,
 		qosset:1,
-		unused:5;
-	struct sched_param param; // [aligned]
-	void *stackaddr; // stack base; vm_page_size aligned
-	size_t stacksize; // stack size; multiple of vm_page_size and >= PTHREAD_STACK_MIN
-	size_t guardsize; // size in bytes of stack overflow guard area
-	unsigned long qosclass;
+		policyset:1,
+		cpupercentset:1,
+		defaultguardpage:1,
+		unused:3;
+	uint32_t
+		cpupercent:8,
+		refillms:24;
 #if defined(__LP64__)
-	uint32_t _reserved[2];
+	uint32_t _reserved[4];
 #else
-	uint32_t _reserved[1];
+	uint32_t _reserved[2];
 #endif
 };
 
 /*
  * Mutex attributes
  */
-#define _PTHREAD_MUTEX_POLICY_NONE		0
-#define _PTHREAD_MUTEX_POLICY_FAIRSHARE		1
-#define _PTHREAD_MUTEX_POLICY_FIRSTFIT		2
-#define _PTHREAD_MUTEX_POLICY_REALTIME		3
-#define _PTHREAD_MUTEX_POLICY_ADAPTIVE		4
-#define _PTHREAD_MUTEX_POLICY_PRIPROTECT	5
-#define _PTHREAD_MUTEX_POLICY_PRIINHERIT	6
 
 #define _PTHREAD_MUTEXATTR_T
 typedef struct {
@@ -269,7 +282,7 @@ typedef struct {
 	uint32_t protocol:2,
 		type:2,
 		pshared:2,
-		policy:3,
+		opt:3,
 		unused:23;
 } pthread_mutexattr_t;
 
@@ -285,6 +298,21 @@ struct _pthread_mutex_options {
 		unused:2,
 		lock_count:16;
 };
+//
+#define _PTHREAD_MUTEX_POLICY_LAST		(PTHREAD_MUTEX_POLICY_FIRSTFIT_NP + 1)
+#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE 1
+#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT 2
+#define _PTHREAD_MTX_OPT_POLICY_DEFAULT _PTHREAD_MTX_OPT_POLICY_FIRSTFIT
+// The following _pthread_mutex_options defintions exist in synch_internal.h
+// such that the kernel extension can test for flags. They must be kept in
+// sync with the bit values in the struct above.
+// _PTHREAD_MTX_OPT_PSHARED 0x010
+// _PTHREAD_MTX_OPT_NOTIFY 0x1000
+// _PTHREAD_MTX_OPT_MUTEX 0x2000
+
+// The fixed mask is used to mask out portions of the mutex options that
+// change on a regular basis (notify, lock_count).
+#define _PTHREAD_MTX_OPT_FIXED_MASK	0x27ff
 
 typedef struct {
 	long sig;
@@ -429,12 +457,6 @@ _pthread_selfid_direct(void)
 #define _PTHREAD_KERN_MUTEX_SIG		0x34567812  /*  */
 #define _PTHREAD_KERN_RWLOCK_SIG	0x56781234  /*  */
 
-#define _PTHREAD_CREATE_PARENT		4
-#define _PTHREAD_EXITED			8
-// 4597450: begin
-#define _PTHREAD_WASCANCEL		0x10
-// 4597450: end
-
 #if defined(DEBUG)
 #define _PTHREAD_MUTEX_OWNER_SELF	pthread_self()
 #else
@@ -454,11 +476,6 @@ extern boolean_t swtch_pri(int);
 /* Prototypes. */
 
 /* Internal globals. */
-PTHREAD_NOEXPORT extern int __pthread_supported_features;
-
-/* Functions defined in machine-dependent files. */
-PTHREAD_NOEXPORT void _pthread_setup(pthread_t th, void (*f)(pthread_t), void *sp, int suspended, int needresume);
-
 PTHREAD_NOEXPORT void _pthread_tsd_cleanup(pthread_t self);
 
 PTHREAD_NOEXPORT int _pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t * flagp, uint32_t ** pmtxp, uint32_t * mgenp, uint32_t * ugenp);
@@ -468,8 +485,8 @@ PTHREAD_NOEXPORT void* malloc(size_t);
 PTHREAD_NOEXPORT void free(void*);
 
 /* syscall interfaces */
-extern uint32_t __psynch_mutexwait(pthread_mutex_t * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
-extern uint32_t __psynch_mutexdrop(pthread_mutex_t * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
+extern uint32_t __psynch_mutexwait(_pthread_mutex * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
+extern uint32_t __psynch_mutexdrop(_pthread_mutex * mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags);
 
 extern uint32_t __psynch_cvbroad(pthread_cond_t * cv, uint64_t cvlsgen, uint64_t cvudgen, uint32_t flags, pthread_mutex_t * mutex,  uint64_t mugen, uint64_t tid);
 extern uint32_t __psynch_cvsignal(pthread_cond_t * cv, uint64_t cvlsgen, uint32_t cvugen, int thread_port, pthread_mutex_t * mutex,  uint64_t mugen, uint64_t tid, uint32_t flags);
@@ -489,7 +506,9 @@ PTHREAD_EXTERN
 int
 __proc_info(int callnum, int pid, int flavor, uint64_t arg, void * buffer, int buffersize);
 
-PTHREAD_NOEXPORT int _pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming);
+PTHREAD_NOEXPORT
+void
+_pthread_deallocate(pthread_t t, bool from_mach_thread);
 
 PTHREAD_NORETURN PTHREAD_NOEXPORT
 void
@@ -499,6 +518,10 @@ PTHREAD_NORETURN PTHREAD_NOEXPORT
 void
 __pthread_abort_reason(const char *fmt, ...) __printflike(1,2);
 
+PTHREAD_NOEXPORT
+thread_qos_t
+_pthread_qos_class_to_thread_qos(qos_class_t qos);
+
 PTHREAD_NOEXPORT
 void
 _pthread_set_main_qos(pthread_priority_t qos);
@@ -515,7 +538,7 @@ PTHREAD_EXPORT
 void
 _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
 
-PTHREAD_EXPORT
+PTHREAD_NORETURN PTHREAD_EXPORT
 void
 _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int flags, int nkevents);
 
@@ -531,9 +554,13 @@ PTHREAD_NOEXPORT_VARIANT
 void
 _pthread_clear_qos_tsd(mach_port_t thread_port);
 
+#define PTHREAD_CONFORM_DARWIN_LEGACY     0
+#define PTHREAD_CONFORM_UNIX03_NOCANCEL   1
+#define PTHREAD_CONFORM_UNIX03_CANCELABLE 2
+
 PTHREAD_NOEXPORT_VARIANT
 void
-_pthread_testcancel(pthread_t thread, int isconforming);
+_pthread_testcancel(int conforming);
 
 PTHREAD_EXPORT
 void
@@ -545,11 +572,11 @@ _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport);
 
 PTHREAD_NOEXPORT
 void
-_pthread_setcancelstate_exit(pthread_t self, void *value_ptr, int conforming);
+_pthread_setcancelstate_exit(pthread_t self, void *value_ptr);
 
 PTHREAD_NOEXPORT
-void *
-_pthread_get_exit_value(pthread_t t, int conforming);
+semaphore_t
+_pthread_joiner_prepost_wake(pthread_t thread);
 
 PTHREAD_ALWAYS_INLINE
 static inline mach_port_t
@@ -647,60 +674,54 @@ _pthread_rwlock_check_signature_init(_pthread_rwlock *rwlock)
 	return (rwlock->sig == _PTHREAD_RWLOCK_SIG_init);
 }
 
-/* ALWAYS called with list lock and return with list lock */
+/*
+ * ALWAYS called without list lock and return with list lock held on success
+ *
+ * This weird calling convention exists because this function will sometimes
+ * drop the lock, and it's best callers don't have to remember this.
+ */
 PTHREAD_ALWAYS_INLINE
 static inline bool
-_pthread_is_valid_locked(pthread_t thread)
+_pthread_validate_thread_and_list_lock(pthread_t thread)
 {
 	pthread_t p;
+	if (thread == NULL) return false;
 loop:
-	TAILQ_FOREACH(p, &__pthread_head, plist) {
-		if (p == thread) {
-			int state = os_atomic_load(&p->cancel_state, relaxed);
-			if (state & _PTHREAD_CANCEL_INITIALIZED) {
-				return true;
+	_PTHREAD_LOCK(_pthread_list_lock);
+	TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
+		if (p != thread) continue;
+		int state = os_atomic_load(&p->cancel_state, relaxed);
+		if (os_likely(state & _PTHREAD_CANCEL_INITIALIZED)) {
+			if (os_unlikely(p->sig != _PTHREAD_SIG)) {
+				PTHREAD_CLIENT_CRASH(0, "pthread_t was corrupted");
 			}
-			_PTHREAD_UNLOCK(_pthread_list_lock);
-			thread_switch(_pthread_kernel_thread(p),
-					SWITCH_OPTION_OSLOCK_DEPRESS, 1);
-			_PTHREAD_LOCK(_pthread_list_lock);
-			goto loop;
+			return true;
 		}
+		_PTHREAD_UNLOCK(_pthread_list_lock);
+		thread_switch(_pthread_kernel_thread(p),
+					  SWITCH_OPTION_OSLOCK_DEPRESS, 1);
+		goto loop;
 	}
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 
 	return false;
 }
 
-#define PTHREAD_IS_VALID_LOCK_THREAD 0x1
-
 PTHREAD_ALWAYS_INLINE
 static inline bool
-_pthread_is_valid(pthread_t thread, int flags, mach_port_t *portp)
+_pthread_is_valid(pthread_t thread, mach_port_t *portp)
 {
 	mach_port_t kport = MACH_PORT_NULL;
 	bool valid;
 
-	if (thread == NULL) {
-		return false;
-	}
-
 	if (thread == pthread_self()) {
 		valid = true;
 		kport = _pthread_kernel_thread(thread);
-		if (flags & PTHREAD_IS_VALID_LOCK_THREAD) {
-			_PTHREAD_LOCK(thread->lock);
-		}
+	} else if (!_pthread_validate_thread_and_list_lock(thread)) {
+		valid = false;
 	} else {
-		_PTHREAD_LOCK(_pthread_list_lock);
-		if (_pthread_is_valid_locked(thread)) {
-			kport = _pthread_kernel_thread(thread);
-			valid = true;
-			if (flags & PTHREAD_IS_VALID_LOCK_THREAD) {
-				_PTHREAD_LOCK(thread->lock);
-			}
-		} else {
-			valid = false;
-		}
+		kport = _pthread_kernel_thread(thread);
+		valid = true;
 		_PTHREAD_UNLOCK(_pthread_list_lock);
 	}
 
diff --git a/src/offsets.h b/src/offsets.h
new file mode 100644
index 0000000..0e20385
--- /dev/null
+++ b/src/offsets.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef _POSIX_PTHREAD_OFFSETS_H
+#define _POSIX_PTHREAD_OFFSETS_H
+
+#ifndef __ASSEMBLER__
+#define check_backward_offset(field, value) \
+		_Static_assert(offsetof(struct _pthread, tsd) + value == \
+				offsetof(struct _pthread, field), #value " is correct")
+#define check_forward_offset(field, value) \
+		_Static_assert(offsetof(struct _pthread, field) == value, \
+				#value " is correct")
+#else
+#define check_backward_offset(field, value)
+#define check_forward_offset(field, value)
+#endif // __ASSEMBLER__
+
+#if defined(__i386__)
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   140
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET 144
+#elif __LP64__
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   -48
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET -40
+#else
+#define _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET   -36
+#define _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET -32
+#endif
+
+#if defined(__i386__)
+check_forward_offset(stackaddr, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET);
+check_forward_offset(stackbottom, _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET);
+#else
+check_backward_offset(stackaddr, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET);
+check_backward_offset(stackbottom, _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET);
+#endif
+
+#endif /* _POSIX_PTHREAD_OFFSETS_H */
diff --git a/src/pthread.c b/src/pthread.c
index 8e63bd3..c9c1b9b 100644
--- a/src/pthread.c
+++ b/src/pthread.c
@@ -56,6 +56,8 @@
 #include "introspection_private.h"
 #include "qos_private.h"
 #include "tsd_private.h"
+#include "pthread/stack_np.h"
+#include "offsets.h" // included to validate the offsets at build time
 
 #include <stdlib.h>
 #include <errno.h>
@@ -63,10 +65,12 @@
 #include <unistd.h>
 #include <mach/mach_init.h>
 #include <mach/mach_vm.h>
+#include <mach/mach_sync_ipc.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
+#include <sys/ulock.h>
 #include <sys/mman.h>
 #include <machine/vmparam.h>
 #define	__APPLE_API_PRIVATE
@@ -77,40 +81,46 @@
 #include <platform/compat.h>
 
 extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
-                    void *newp, size_t newlen);
+		void *newp, size_t newlen);
 extern void __exit(int) __attribute__((noreturn));
 extern int __pthread_kill(mach_port_t, int);
 
-extern struct _pthread _thread;
-extern int default_priority;
+extern void _pthread_joiner_wake(pthread_t thread);
 
+#if !VARIANT_DYLD
+PTHREAD_NOEXPORT extern struct _pthread *_main_thread_ptr;
+#define main_thread() (_main_thread_ptr)
+#endif // VARIANT_DYLD
 
-//
-// Global variables
-//
+// Default stack size is 512KB; independent of the main thread's stack size.
+#define DEFAULT_STACK_SIZE (size_t)(512 * 1024)
 
-static void (*exitf)(int) = __exit;
-PTHREAD_NOEXPORT void* (*_pthread_malloc)(size_t) = NULL;
-PTHREAD_NOEXPORT void (*_pthread_free)(void *) = NULL;
 
-#if PTHREAD_DEBUG_LOG
-#include <fcntl.h>
-int _pthread_debuglog;
-uint64_t _pthread_debugstart;
-#endif
-
-// This global should be used (carefully) by anyone needing to know if a
-// pthread (other than the main thread) has been created.
-int __is_threaded = 0;
+//
+// Global constants
+//
 
-int __unix_conforming = 0;
+/*
+ * The pthread may be offset into a page.  In that event, by contract
+ * with the kernel, the allocation will extend PTHREAD_SIZE from the
+ * start of the next page.  There's also one page worth of allocation
+ * below stacksize for the guard page. <rdar://problem/19941744>
+ */
+#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
+#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size)
+#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize))
 
-// _pthread_list_lock protects _pthread_count, access to the __pthread_head
-// list, and the parentcheck, childrun and childexit flags of the pthread
-// structure. Externally imported by pthread_cancelable.c.
-PTHREAD_NOEXPORT _pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER;
-PTHREAD_NOEXPORT struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head);
-static int _pthread_count = 1;
+static const pthread_attr_t _pthread_attr_default = {
+	.sig       = _PTHREAD_ATTR_SIG,
+	.stacksize = 0,
+	.detached  = PTHREAD_CREATE_JOINABLE,
+	.inherit   = _PTHREAD_DEFAULT_INHERITSCHED,
+	.policy    = _PTHREAD_DEFAULT_POLICY,
+	.defaultguardpage = true,
+	// compile time constant for _pthread_default_priority(0)
+	.qosclass  = (1U << (THREAD_QOS_LEGACY - 1 + _PTHREAD_PRIORITY_QOS_CLASS_SHIFT)) |
+			((uint8_t)-1 & _PTHREAD_PRIORITY_PRIORITY_MASK),
+};
 
 #if PTHREAD_LAYOUT_SPI
 
@@ -124,315 +134,154 @@ const struct pthread_layout_offsets_s pthread_layout_offsets = {
 #endif // PTHREAD_LAYOUT_SPI
 
 //
-// Static variables
+// Global exported variables
 //
 
-// Mach message notification that a thread needs to be recycled.
-typedef struct _pthread_reap_msg_t {
-	mach_msg_header_t header;
-	pthread_t thread;
-	mach_msg_trailer_t trailer;
-} pthread_reap_msg_t;
+// This global should be used (carefully) by anyone needing to know if a
+// pthread (other than the main thread) has been created.
+int __is_threaded = 0;
+int __unix_conforming = 0;
 
-/*
- * The pthread may be offset into a page.  In that event, by contract
- * with the kernel, the allocation will extend PTHREAD_SIZE from the
- * start of the next page.  There's also one page worth of allocation
- * below stacksize for the guard page. <rdar://problem/19941744>
- */
-#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
-#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size)
-#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize))
+//
+// Global internal variables
+//
 
-static pthread_attr_t _pthread_attr_default = { };
+// _pthread_list_lock protects _pthread_count, access to the __pthread_head
+// list. Externally imported by pthread_cancelable.c.
+struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head);
+_pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER;
+
+uint32_t _main_qos;
 
+#if VARIANT_DYLD
 // The main thread's pthread_t
-PTHREAD_NOEXPORT struct _pthread _thread __attribute__((aligned(64))) = { };
+struct _pthread _main_thread __attribute__((aligned(64))) = { };
+#define main_thread() (&_main_thread)
+#else // VARIANT_DYLD
+struct _pthread *_main_thread_ptr;
+#endif // VARIANT_DYLD
 
-PTHREAD_NOEXPORT int default_priority;
-static int max_priority;
-static int min_priority;
+#if PTHREAD_DEBUG_LOG
+#include <fcntl.h>
+int _pthread_debuglog;
+uint64_t _pthread_debugstart;
+#endif
+
+//
+// Global static variables
+//
+static bool __workq_newapi;
+static uint8_t default_priority;
+#if !VARIANT_DYLD
+static uint8_t max_priority;
+static uint8_t min_priority;
+#endif // !VARIANT_DYLD
+static int _pthread_count = 1;
 static int pthread_concurrency;
+static uintptr_t _pthread_ptr_munge_token;
+
+static void (*exitf)(int) = __exit;
+#if !VARIANT_DYLD
+static void *(*_pthread_malloc)(size_t) = NULL;
+static void (*_pthread_free)(void *) = NULL;
+#endif // !VARIANT_DYLD
 
 // work queue support data
-static void (*__libdispatch_workerfunction)(pthread_priority_t) = NULL;
-static void (*__libdispatch_keventfunction)(void **events, int *nevents) = NULL;
-static void (*__libdispatch_workloopfunction)(uint64_t *workloop_id, void **events, int *nevents) = NULL;
+PTHREAD_NORETURN
+static void
+__pthread_invalid_keventfunction(void **events, int *nevents)
+{
+	PTHREAD_CLIENT_CRASH(0, "Invalid kqworkq setup");
+}
+
+PTHREAD_NORETURN
+static void
+__pthread_invalid_workloopfunction(uint64_t *workloop_id, void **events, int *nevents)
+{
+	PTHREAD_CLIENT_CRASH(0, "Invalid kqwl setup");
+}
+static pthread_workqueue_function2_t __libdispatch_workerfunction;
+static pthread_workqueue_function_kevent_t __libdispatch_keventfunction = &__pthread_invalid_keventfunction;
+static pthread_workqueue_function_workloop_t __libdispatch_workloopfunction = &__pthread_invalid_workloopfunction;
 static int __libdispatch_offset;
+static int __pthread_supported_features; // supported feature set
 
-// supported feature set
-int __pthread_supported_features;
-static bool __workq_newapi;
+#if defined(__i386__) || defined(__x86_64__)
+static mach_vm_address_t __pthread_stack_hint = 0xB0000000;
+#else
+#error no __pthread_stack_hint for this architecture
+#endif
 
 //
 // Function prototypes
 //
 
 // pthread primitives
-static int _pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack);
-static int _pthread_deallocate(pthread_t t);
-
-static void _pthread_terminate_invoke(pthread_t t);
-
-static inline void _pthread_struct_init(pthread_t t,
-	const pthread_attr_t *attrs,
-	void *stack,
-	size_t stacksize,
-	void *freeaddr,
-	size_t freesize);
+static inline void _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs,
+		void *stack, size_t stacksize, void *freeaddr, size_t freesize);
 
+#if VARIANT_DYLD
+static void _pthread_set_self_dyld(void);
+#endif // VARIANT_DYLD
 static inline void _pthread_set_self_internal(pthread_t, bool needs_tsd_base_set);
 
 static void _pthread_dealloc_reply_port(pthread_t t);
 static void _pthread_dealloc_special_reply_port(pthread_t t);
 
-static inline void __pthread_add_thread(pthread_t t, const pthread_attr_t *attr, bool parent, bool from_mach_thread);
-static inline int __pthread_remove_thread(pthread_t t, bool child, bool *should_exit);
+static inline void __pthread_started_thread(pthread_t t);
 
 static void _pthread_exit(pthread_t self, void *value_ptr) __dead2;
 
-static inline void _pthread_introspection_thread_create(pthread_t t, bool destroy);
+static inline void _pthread_introspection_thread_create(pthread_t t);
 static inline void _pthread_introspection_thread_start(pthread_t t);
-static inline void _pthread_introspection_thread_terminate(pthread_t t, void *freeaddr, size_t freesize, bool destroy);
+static inline void _pthread_introspection_thread_terminate(pthread_t t);
 static inline void _pthread_introspection_thread_destroy(pthread_t t);
 
 extern void _pthread_set_self(pthread_t);
 extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse); // trampoline into _pthread_wqthread
 extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); // trampoline into _pthread_start
 
-/* Compatibility: previous pthread API used WORKQUEUE_OVERCOMMIT to request overcommit threads from
- * the kernel. This definition is kept here, in userspace only, to perform the compatibility shimm
- * from old API requests to the new kext conventions.
- */
-#define WORKQUEUE_OVERCOMMIT 0x10000
-
 /*
  * Flags filed passed to bsdthread_create and back in pthread_start
-31  <---------------------------------> 0
-_________________________________________
-| flags(8) | policy(8) | importance(16) |
------------------------------------------
-*/
-
-#define PTHREAD_START_CUSTOM		0x01000000
+ * 31  <---------------------------------> 0
+ * _________________________________________
+ * | flags(8) | policy(8) | importance(16) |
+ * -----------------------------------------
+ */
+#define PTHREAD_START_CUSTOM		0x01000000 // <rdar://problem/34501401>
 #define PTHREAD_START_SETSCHED		0x02000000
-#define PTHREAD_START_DETACHED		0x04000000
+// was PTHREAD_START_DETACHED		0x04000000
 #define PTHREAD_START_QOSCLASS		0x08000000
 #define PTHREAD_START_TSD_BASE_SET	0x10000000
+#define PTHREAD_START_SUSPENDED		0x20000000
 #define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
 #define PTHREAD_START_POLICY_BITSHIFT 16
 #define PTHREAD_START_POLICY_MASK 0xff
 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
 
-static int pthread_setschedparam_internal(pthread_t, mach_port_t, int, const struct sched_param *);
+#if (!defined(__OPEN_SOURCE__) && TARGET_OS_OSX) || OS_VARIANT_RESOLVED // 40703288
+static int pthread_setschedparam_internal(pthread_t, mach_port_t, int,
+		const struct sched_param *);
+#endif
+
 extern pthread_t __bsdthread_create(void *(*func)(void *), void * func_arg, void * stack, pthread_t  thread, unsigned int flags);
 extern int __bsdthread_register(void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, void *, int), int,void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), int32_t *,__uint64_t);
 extern int __bsdthread_terminate(void * freeaddr, size_t freesize, mach_port_t kport, mach_port_t joinsem);
 extern __uint64_t __thread_selfid( void );
 
-extern int __workq_open(void);
-extern int __workq_kernreturn(int, void *, int, int);
-
-#if defined(__i386__) || defined(__x86_64__)
-static const mach_vm_address_t PTHREAD_STACK_HINT = 0xB0000000;
+#if __LP64__
+_Static_assert(offsetof(struct _pthread, tsd) == 224, "TSD LP64 offset");
 #else
-#error no PTHREAD_STACK_HINT for this architecture
+_Static_assert(offsetof(struct _pthread, tsd) == 176, "TSD ILP32 offset");
 #endif
-
-// Check that offsets of _PTHREAD_STRUCT_DIRECT_*_OFFSET values hasn't changed
 _Static_assert(offsetof(struct _pthread, tsd) + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET
 		== offsetof(struct _pthread, thread_id),
 		"_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET is correct");
 
-// Allocate a thread structure, stack and guard page.
-//
-// The thread structure may optionally be placed in the same allocation as the
-// stack, residing above the top of the stack. This cannot be done if a
-// custom stack address is provided.
-//
-// Similarly the guard page cannot be allocated if a custom stack address is
-// provided.
-//
-// The allocated thread structure is initialized with values that indicate how
-// it should be freed.
-
-static int
-_pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack)
-{
-	int res;
-	kern_return_t kr;
-	pthread_t t = NULL;
-	mach_vm_address_t allocaddr = PTHREAD_STACK_HINT;
-	size_t allocsize = 0;
-	size_t guardsize = 0;
-	size_t stacksize = 0;
-
-	PTHREAD_ASSERT(attrs->stacksize >= PTHREAD_STACK_MIN);
-
-	*thread = NULL;
-	*stack = NULL;
-
-	// Allocate a pthread structure if necessary
-
-	if (attrs->stackaddr != NULL) {
-		PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0);
-		*stack = attrs->stackaddr;
-		allocsize = PTHREAD_SIZE;
-	} else {
-		guardsize = attrs->guardsize;
-		stacksize = attrs->stacksize;
-		allocsize = stacksize + guardsize + PTHREAD_SIZE;
-	}
-
-	kr = mach_vm_map(mach_task_self(),
-			 &allocaddr,
-			 allocsize,
-			 vm_page_size - 1,
-			 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE,
-			 MEMORY_OBJECT_NULL,
-			 0,
-			 FALSE,
-			 VM_PROT_DEFAULT,
-			 VM_PROT_ALL,
-			 VM_INHERIT_DEFAULT);
-
-	if (kr != KERN_SUCCESS) {
-		kr = mach_vm_allocate(mach_task_self(),
-				 &allocaddr,
-				 allocsize,
-				 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
-	}
-
-	if (kr == KERN_SUCCESS) {
-		// The stack grows down.
-		// Set the guard page at the lowest address of the
-		// newly allocated stack. Return the highest address
-		// of the stack.
-		if (guardsize) {
-			(void)mach_vm_protect(mach_task_self(), allocaddr, guardsize, FALSE, VM_PROT_NONE);
-		}
-
-		// Thread structure resides at the top of the stack.
-		t = (void *)(allocaddr + stacksize + guardsize);
-		if (stacksize) {
-			// Returns the top of the stack.
-			*stack = t;
-		}
-	}
-
-	if (t != NULL) {
-		_pthread_struct_init(t, attrs,
-				     *stack, attrs->stacksize,
-				     allocaddr, allocsize);
-		*thread = t;
-		res = 0;
-	} else {
-		res = EAGAIN;
-	}
-        return res;
-}
-
-static int
-_pthread_deallocate(pthread_t t)
-{
-	// Don't free the main thread.
-	if (t != &_thread) {
-		kern_return_t ret;
-		ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
-		PTHREAD_ASSERT(ret == KERN_SUCCESS);
-	}
-	return 0;
-}
-
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wreturn-stack-address"
-
-PTHREAD_NOINLINE
-static void*
-_pthread_current_stack_address(void)
-{
-	int a;
-	return &a;
-}
-
-#pragma clang diagnostic pop
-
-// Terminates the thread if called from the currently running thread.
-PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED
-static void
-_pthread_terminate(pthread_t t)
-{
-	PTHREAD_ASSERT(t == pthread_self());
-
-	uintptr_t freeaddr = (uintptr_t)t->freeaddr;
-	size_t freesize = t->freesize;
-
-	// the size of just the stack
-	size_t freesize_stack = t->freesize;
-
-	// We usually pass our structure+stack to bsdthread_terminate to free, but
-	// if we get told to keep the pthread_t structure around then we need to
-	// adjust the free size and addr in the pthread_t to just refer to the
-	// structure and not the stack.  If we do end up deallocating the
-	// structure, this is useless work since no one can read the result, but we
-	// can't do it after the call to pthread_remove_thread because it isn't
-	// safe to dereference t after that.
-	if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){
-		// Check to ensure the pthread structure itself is part of the
-		// allocation described by freeaddr/freesize, in which case we split and
-		// only deallocate the area below the pthread structure.  In the event of a
-		// custom stack, the freeaddr/size will be the pthread structure itself, in
-		// which case we shouldn't free anything (the final else case).
-		freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr);
-
-		// describe just the remainder for deallocation when the pthread_t goes away
-		t->freeaddr += freesize_stack;
-		t->freesize -= freesize_stack;
-	} else if (t == &_thread){
-		freeaddr = t->stackaddr - pthread_get_stacksize_np(t);
-		uintptr_t stackborder = trunc_page((uintptr_t)_pthread_current_stack_address());
-		freesize_stack = stackborder - freeaddr;
-	} else {
-		freesize_stack = 0;
-	}
-
-	mach_port_t kport = _pthread_kernel_thread(t);
-	semaphore_t joinsem = t->joiner_notify;
-
-	_pthread_dealloc_special_reply_port(t);
-	_pthread_dealloc_reply_port(t);
+#pragma mark pthread attrs
 
-	// After the call to __pthread_remove_thread, it is not safe to
-	// dereference the pthread_t structure.
-
-	bool destroy, should_exit;
-	destroy = (__pthread_remove_thread(t, true, &should_exit) != EBUSY);
-
-	if (!destroy || t == &_thread) {
-		// Use the adjusted freesize of just the stack that we computed above.
-		freesize = freesize_stack;
-	}
-
-	// Check if there is nothing to free because the thread has a custom
-	// stack allocation and is joinable.
-	if (freesize == 0) {
-		freeaddr = 0;
-	}
-	_pthread_introspection_thread_terminate(t, freeaddr, freesize, destroy);
-	if (should_exit) {
-		exitf(0);
-	}
-
-	__bsdthread_terminate((void *)freeaddr, freesize, kport, joinsem);
-	PTHREAD_ABORT("thread %p didn't terminate", t);
-}
-
-PTHREAD_NORETURN
-static void
-_pthread_terminate_invoke(pthread_t t)
-{
-	_pthread_terminate(t);
-}
+_Static_assert(sizeof(struct _pthread_attr_t) == sizeof(__darwin_pthread_attr_t),
+		"internal pthread_attr_t == external pthread_attr_t");
 
 int
 pthread_attr_destroy(pthread_attr_t *attr)
@@ -467,12 +316,24 @@ pthread_attr_getinheritsched(const pthread_attr_t *attr, int *inheritsched)
 	return ret;
 }
 
+static PTHREAD_ALWAYS_INLINE void
+_pthread_attr_get_schedparam(const pthread_attr_t *attr,
+		struct sched_param *param)
+{
+	if (attr->schedset) {
+		*param = attr->param;
+	} else {
+		param->sched_priority = default_priority;
+		param->quantum = 10; /* quantum isn't public yet */
+	}
+}
+
 int
 pthread_attr_getschedparam(const pthread_attr_t *attr, struct sched_param *param)
 {
 	int ret = EINVAL;
 	if (attr->sig == _PTHREAD_ATTR_SIG) {
-		*param = attr->param;
+		_pthread_attr_get_schedparam(attr, param);
 		ret = 0;
 	}
 	return ret;
@@ -489,24 +350,10 @@ pthread_attr_getschedpolicy(const pthread_attr_t *attr, int *policy)
 	return ret;
 }
 
-// Default stack size is 512KB; independent of the main thread's stack size.
-static const size_t DEFAULT_STACK_SIZE = 512 * 1024;
-
 int
 pthread_attr_init(pthread_attr_t *attr)
 {
-	attr->stacksize = DEFAULT_STACK_SIZE;
-	attr->stackaddr = NULL;
-	attr->sig = _PTHREAD_ATTR_SIG;
-	attr->param.sched_priority = default_priority;
-	attr->param.quantum = 10; /* quantum isn't public yet */
-	attr->detached = PTHREAD_CREATE_JOINABLE;
-	attr->inherit = _PTHREAD_DEFAULT_INHERITSCHED;
-	attr->policy = _PTHREAD_DEFAULT_POLICY;
-	attr->fastpath = 1;
-	attr->schedset = 0;
-	attr->guardsize = vm_page_size;
-	attr->qosclass = _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, 0);
+	*attr = _pthread_attr_default;
 	return 0;
 }
 
@@ -515,8 +362,8 @@ pthread_attr_setdetachstate(pthread_attr_t *attr, int detachstate)
 {
 	int ret = EINVAL;
 	if (attr->sig == _PTHREAD_ATTR_SIG &&
-	    (detachstate == PTHREAD_CREATE_JOINABLE ||
-	     detachstate == PTHREAD_CREATE_DETACHED)) {
+			(detachstate == PTHREAD_CREATE_JOINABLE ||
+			detachstate == PTHREAD_CREATE_DETACHED)) {
 		attr->detached = detachstate;
 		ret = 0;
 	}
@@ -528,8 +375,8 @@ pthread_attr_setinheritsched(pthread_attr_t *attr, int inheritsched)
 {
 	int ret = EINVAL;
 	if (attr->sig == _PTHREAD_ATTR_SIG &&
-	    (inheritsched == PTHREAD_INHERIT_SCHED ||
-	     inheritsched == PTHREAD_EXPLICIT_SCHED)) {
+			(inheritsched == PTHREAD_INHERIT_SCHED ||
+			inheritsched == PTHREAD_EXPLICIT_SCHED)) {
 		attr->inherit = inheritsched;
 		ret = 0;
 	}
@@ -553,12 +400,14 @@ int
 pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy)
 {
 	int ret = EINVAL;
-	if (attr->sig == _PTHREAD_ATTR_SIG &&
-	    (policy == SCHED_OTHER ||
-	     policy == SCHED_RR ||
-	     policy == SCHED_FIFO)) {
+	if (attr->sig == _PTHREAD_ATTR_SIG && (policy == SCHED_OTHER ||
+			policy == SCHED_RR || policy == SCHED_FIFO)) {
+		if (!_PTHREAD_POLICY_IS_FIXEDPRI(policy)) {
+			/* non-fixedpri policy should remove cpupercent */
+			attr->cpupercentset = 0;
+		}
 		attr->policy = policy;
-		attr->schedset = 1;
+		attr->policyset = 1;
 		ret = 0;
 	}
 	return ret;
@@ -606,21 +455,27 @@ pthread_attr_setstackaddr(pthread_attr_t *attr, void *stackaddr)
 {
 	int ret = EINVAL;
 	if (attr->sig == _PTHREAD_ATTR_SIG &&
-	    ((uintptr_t)stackaddr % vm_page_size) == 0) {
+			((uintptr_t)stackaddr % vm_page_size) == 0) {
 		attr->stackaddr = stackaddr;
-		attr->fastpath = 0;
+		attr->defaultguardpage = false;
 		attr->guardsize = 0;
 		ret = 0;
 	}
 	return ret;
 }
 
+static inline size_t
+_pthread_attr_stacksize(const pthread_attr_t *attr)
+{
+	return attr->stacksize ? attr->stacksize : DEFAULT_STACK_SIZE;
+}
+
 int
 pthread_attr_getstacksize(const pthread_attr_t *attr, size_t *stacksize)
 {
 	int ret = EINVAL;
 	if (attr->sig == _PTHREAD_ATTR_SIG) {
-		*stacksize = attr->stacksize;
+		*stacksize = _pthread_attr_stacksize(attr);
 		ret = 0;
 	}
 	return ret;
@@ -631,70 +486,356 @@ pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize)
 {
 	int ret = EINVAL;
 	if (attr->sig == _PTHREAD_ATTR_SIG &&
-	    (stacksize % vm_page_size) == 0 &&
-	    stacksize >= PTHREAD_STACK_MIN) {
+			(stacksize % vm_page_size) == 0 &&
+			stacksize >= PTHREAD_STACK_MIN) {
+		attr->stacksize = stacksize;
+		ret = 0;
+	}
+	return ret;
+}
+
+int
+pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t * stacksize)
+{
+	int ret = EINVAL;
+	if (attr->sig == _PTHREAD_ATTR_SIG) {
+		*stackaddr = (void *)((uintptr_t)attr->stackaddr - attr->stacksize);
+		*stacksize = _pthread_attr_stacksize(attr);
+		ret = 0;
+	}
+	return ret;
+}
+
+// Per SUSv3, the stackaddr is the base address, the lowest addressable byte
+// address. This is not the same as in pthread_attr_setstackaddr.
+int
+pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize)
+{
+	int ret = EINVAL;
+	if (attr->sig == _PTHREAD_ATTR_SIG &&
+			((uintptr_t)stackaddr % vm_page_size) == 0 &&
+			(stacksize % vm_page_size) == 0 &&
+			stacksize >= PTHREAD_STACK_MIN) {
+		attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize);
 		attr->stacksize = stacksize;
 		ret = 0;
 	}
-	return ret;
+	return ret;
+}
+
+int
+pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize)
+{
+	int ret = EINVAL;
+	if (attr->sig == _PTHREAD_ATTR_SIG && (guardsize % vm_page_size) == 0) {
+		/* Guardsize of 0 is valid, means no guard */
+		attr->defaultguardpage = false;
+		attr->guardsize = guardsize;
+		ret = 0;
+	}
+	return ret;
+}
+
+static inline size_t
+_pthread_attr_guardsize(const pthread_attr_t *attr)
+{
+	return attr->defaultguardpage ? vm_page_size : attr->guardsize;
+}
+
+int
+pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize)
+{
+	int ret = EINVAL;
+	if (attr->sig == _PTHREAD_ATTR_SIG) {
+		*guardsize = _pthread_attr_guardsize(attr);
+		ret = 0;
+	}
+	return ret;
+}
+
+int
+pthread_attr_setcpupercent_np(pthread_attr_t *attr, int percent,
+		unsigned long refillms)
+{
+	int ret = EINVAL;
+	if (attr->sig == _PTHREAD_ATTR_SIG && percent < UINT8_MAX &&
+			refillms < _PTHREAD_ATTR_REFILLMS_MAX && attr->policyset &&
+			_PTHREAD_POLICY_IS_FIXEDPRI(attr->policy)) {
+		attr->cpupercent = percent;
+		attr->refillms = (uint32_t)(refillms & 0x00ffffff);
+		attr->cpupercentset = 1;
+		ret = 0;
+	}
+	return ret;
+}
+
+#pragma mark pthread lifetime
+
+// Allocate a thread structure, stack and guard page.
+//
+// The thread structure may optionally be placed in the same allocation as the
+// stack, residing above the top of the stack. This cannot be done if a
+// custom stack address is provided.
+//
+// Similarly the guard page cannot be allocated if a custom stack address is
+// provided.
+//
+// The allocated thread structure is initialized with values that indicate how
+// it should be freed.
+
+static pthread_t
+_pthread_allocate(const pthread_attr_t *attrs, void **stack)
+{
+	mach_vm_address_t allocaddr = __pthread_stack_hint;
+	size_t allocsize, guardsize, stacksize;
+	kern_return_t kr;
+	pthread_t t;
+
+	PTHREAD_ASSERT(attrs->stacksize == 0 ||
+			attrs->stacksize >= PTHREAD_STACK_MIN);
+
+	// Allocate a pthread structure if necessary
+
+	if (attrs->stackaddr != NULL) {
+		PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0);
+		allocsize = PTHREAD_SIZE;
+		guardsize = 0;
+		// <rdar://problem/42588315> if the attrs struct specifies a custom
+		// stack address but not a custom size, using ->stacksize here instead
+		// of _pthread_attr_stacksize stores stacksize as zero, indicating
+		// that the stack size is unknown.
+		stacksize = attrs->stacksize;
+	} else {
+		guardsize = _pthread_attr_guardsize(attrs);
+		stacksize = _pthread_attr_stacksize(attrs) + PTHREAD_T_OFFSET;
+		allocsize = stacksize + guardsize + PTHREAD_SIZE;
+		allocsize = mach_vm_round_page(allocsize);
+	}
+
+	kr = mach_vm_map(mach_task_self(), &allocaddr, allocsize, vm_page_size - 1,
+			 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL,
+			 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+
+	if (kr != KERN_SUCCESS) {
+		kr = mach_vm_allocate(mach_task_self(), &allocaddr, allocsize,
+				 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
+	}
+	if (kr != KERN_SUCCESS) {
+		*stack  = NULL;
+		return NULL;
+	}
+
+	// The stack grows down.
+	// Set the guard page at the lowest address of the
+	// newly allocated stack. Return the highest address
+	// of the stack.
+	if (guardsize) {
+		(void)mach_vm_protect(mach_task_self(), allocaddr, guardsize,
+				FALSE, VM_PROT_NONE);
+	}
+
+	// Thread structure resides at the top of the stack (when using a
+	// custom stack, allocsize == PTHREAD_SIZE, so places the pthread_t
+	// at allocaddr).
+	t = (pthread_t)(allocaddr + allocsize - PTHREAD_SIZE);
+	if (attrs->stackaddr) {
+		*stack = attrs->stackaddr;
+	} else {
+		*stack = t;
+	}
+
+	_pthread_struct_init(t, attrs, *stack, stacksize, allocaddr, allocsize);
+	return t;
+}
+
+PTHREAD_NOINLINE
+void
+_pthread_deallocate(pthread_t t, bool from_mach_thread)
+{
+	kern_return_t ret;
+
+	// Don't free the main thread.
+	if (t != main_thread()) {
+		if (!from_mach_thread) { // see __pthread_add_thread
+			_pthread_introspection_thread_destroy(t);
+		}
+		ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
+		PTHREAD_ASSERT(ret == KERN_SUCCESS);
+	}
 }
 
-int
-pthread_attr_getstack(const pthread_attr_t *attr, void **stackaddr, size_t * stacksize)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreturn-stack-address"
+
+PTHREAD_NOINLINE
+static void*
+_pthread_current_stack_address(void)
 {
-	int ret = EINVAL;
-	if (attr->sig == _PTHREAD_ATTR_SIG) {
-		*stackaddr = (void *)((uintptr_t)attr->stackaddr - attr->stacksize);
-		*stacksize = attr->stacksize;
-		ret = 0;
-	}
-	return ret;
+	int a;
+	return &a;
 }
 
-// Per SUSv3, the stackaddr is the base address, the lowest addressable byte
-// address. This is not the same as in pthread_attr_setstackaddr.
-int
-pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize)
+#pragma clang diagnostic pop
+
+void
+_pthread_joiner_wake(pthread_t thread)
 {
-	int ret = EINVAL;
-	if (attr->sig == _PTHREAD_ATTR_SIG &&
-	    ((uintptr_t)stackaddr % vm_page_size) == 0 &&
-	    (stacksize % vm_page_size) == 0 &&
-	    stacksize >= PTHREAD_STACK_MIN) {
-		attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize);
-        	attr->stacksize = stacksize;
-		attr->fastpath = 0;
-		ret = 0;
+	uint32_t *exit_gate = &thread->tl_exit_gate;
+
+	for (;;) {
+		int ret = __ulock_wake(UL_UNFAIR_LOCK | ULF_NO_ERRNO, exit_gate, 0);
+		if (ret == 0 || ret == -ENOENT) {
+			return;
+		}
+		if (ret != -EINTR) {
+			PTHREAD_INTERNAL_CRASH(-ret, "pthread_join() wake failure");
+		}
 	}
-	return ret;
 }
 
-int
-pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize)
+// Terminates the thread if called from the currently running thread.
+PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED
+static void
+_pthread_terminate(pthread_t t, void *exit_value)
 {
-	int ret = EINVAL;
-	if (attr->sig == _PTHREAD_ATTR_SIG) {
-		/* Guardsize of 0 is valid, ot means no guard */
-		if ((guardsize % vm_page_size) == 0) {
-			attr->guardsize = guardsize;
-			attr->fastpath = 0;
-			ret = 0;
+	PTHREAD_ASSERT(t == pthread_self());
+
+	_pthread_introspection_thread_terminate(t);
+
+	uintptr_t freeaddr = (uintptr_t)t->freeaddr;
+	size_t freesize = t->freesize;
+	bool should_exit;
+
+	// the size of just the stack
+	size_t freesize_stack = t->freesize;
+
+	// We usually pass our structure+stack to bsdthread_terminate to free, but
+	// if we get told to keep the pthread_t structure around then we need to
+	// adjust the free size and addr in the pthread_t to just refer to the
+	// structure and not the stack.  If we do end up deallocating the
+	// structure, this is useless work since no one can read the result, but we
+	// can't do it after the call to pthread_remove_thread because it isn't
+	// safe to dereference t after that.
+	if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){
+		// Check to ensure the pthread structure itself is part of the
+		// allocation described by freeaddr/freesize, in which case we split and
+		// only deallocate the area below the pthread structure.  In the event of a
+		// custom stack, the freeaddr/size will be the pthread structure itself, in
+		// which case we shouldn't free anything (the final else case).
+		freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr);
+
+		// describe just the remainder for deallocation when the pthread_t goes away
+		t->freeaddr += freesize_stack;
+		t->freesize -= freesize_stack;
+	} else if (t == main_thread()) {
+		freeaddr = t->stackaddr - pthread_get_stacksize_np(t);
+		uintptr_t stackborder = trunc_page((uintptr_t)_pthread_current_stack_address());
+		freesize_stack = stackborder - freeaddr;
+	} else {
+		freesize_stack = 0;
+	}
+
+	mach_port_t kport = _pthread_kernel_thread(t);
+	bool keep_thread_struct = false, needs_wake = false;
+	semaphore_t custom_stack_sema = MACH_PORT_NULL;
+
+	_pthread_dealloc_special_reply_port(t);
+	_pthread_dealloc_reply_port(t);
+
+	_PTHREAD_LOCK(_pthread_list_lock);
+
+	// This piece of code interacts with pthread_join. It will always:
+	// - set tl_exit_gate to MACH_PORT_DEAD (thread exited)
+	// - set tl_exit_value to the value passed to pthread_exit()
+	// - decrement _pthread_count, so that we can exit the process when all
+	//   threads exited even if not all of them were joined.
+	t->tl_exit_gate = MACH_PORT_DEAD;
+	t->tl_exit_value = exit_value;
+	should_exit = (--_pthread_count <= 0);
+
+	// If we see a joiner, we prepost that the join has to succeed,
+	// and the joiner is committed to finish (even if it was canceled)
+	if (t->tl_join_ctx) {
+		custom_stack_sema = _pthread_joiner_prepost_wake(t); // unsets tl_joinable
+		needs_wake = true;
+	}
+
+	// Joinable threads that have no joiner yet are kept on the thread list
+	// so that pthread_join() can later discover the thread when it is joined,
+	// and will have to do the pthread_t cleanup.
+	if (t->tl_joinable) {
+		t->tl_joiner_cleans_up = keep_thread_struct = true;
+	} else {
+		TAILQ_REMOVE(&__pthread_head, t, tl_plist);
+	}
+
+	_PTHREAD_UNLOCK(_pthread_list_lock);
+
+	if (needs_wake) {
+		// When we found a waiter, we want to drop the very contended list lock
+		// before we do the syscall in _pthread_joiner_wake(). Then, we decide
+		// who gets to cleanup the pthread_t between the joiner and the exiting
+		// thread:
+		// - the joiner tries to set tl_join_ctx to NULL
+		// - the exiting thread tries to set tl_joiner_cleans_up to true
+		// Whoever does it first commits the other guy to cleanup the pthread_t
+		_pthread_joiner_wake(t);
+		_PTHREAD_LOCK(_pthread_list_lock);
+		if (t->tl_join_ctx) {
+			t->tl_joiner_cleans_up = true;
+			keep_thread_struct = true;
 		}
+		_PTHREAD_UNLOCK(_pthread_list_lock);
 	}
-	return ret;
-}
 
-int
-pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize)
-{
-	int ret = EINVAL;
-	if (attr->sig == _PTHREAD_ATTR_SIG) {
-		*guardsize = attr->guardsize;
-		ret = 0;
+	//
+	// /!\ dereferencing `t` past this point is not safe /!\
+	//
+
+	if (keep_thread_struct || t == main_thread()) {
+		// Use the adjusted freesize of just the stack that we computed above.
+		freesize = freesize_stack;
+	} else {
+		_pthread_introspection_thread_destroy(t);
 	}
-	return ret;
+
+	// Check if there is nothing to free because the thread has a custom
+	// stack allocation and is joinable.
+	if (freesize == 0) {
+		freeaddr = 0;
+	}
+	if (should_exit) {
+		exitf(0);
+	}
+	__bsdthread_terminate((void *)freeaddr, freesize, kport, custom_stack_sema);
+	PTHREAD_INTERNAL_CRASH(t, "thread didn't terminate");
+}
+
+PTHREAD_NORETURN
+static void
+_pthread_terminate_invoke(pthread_t t, void *exit_value)
+{
+#if PTHREAD_T_OFFSET
+	void *p = NULL;
+	// <rdar://problem/25688492> During pthread termination there is a race
+	// between pthread_join and pthread_terminate; if the joiner is responsible
+	// for cleaning up the pthread_t struct, then it may destroy some part of the
+	// stack with it on 16k OSes. So that this doesn't cause _pthread_terminate()
+	// to crash because its stack has been removed from under its feet, just make
+	// sure termination happens in a part of the stack that is not on the same
+	// page as the pthread_t.
+	if (trunc_page((uintptr_t)__builtin_frame_address(0)) ==
+			trunc_page((uintptr_t)t)) {
+		p = alloca(PTHREAD_T_OFFSET);
+	}
+	// And this __asm__ volatile is needed to stop the compiler from optimising
+	// away the alloca() completely.
+	__asm__ volatile ("" : : "r"(p) );
+#endif
+	_pthread_terminate(t, exit_value);
 }
 
+#pragma mark pthread start / body
 
 /*
  * Create and start execution of a new thread.
@@ -704,51 +845,22 @@ static void
 _pthread_body(pthread_t self, bool needs_tsd_base_set)
 {
 	_pthread_set_self_internal(self, needs_tsd_base_set);
-	__pthread_add_thread(self, NULL, false, false);
-	void *result = (self->fun)(self->arg);
-
-	_pthread_exit(self, result);
+	__pthread_started_thread(self);
+	_pthread_exit(self, (self->fun)(self->arg));
 }
 
 PTHREAD_NORETURN
 void
-_pthread_start(pthread_t self,
-	       mach_port_t kport,
-	       void *(*fun)(void *),
-	       void *arg,
-	       size_t stacksize,
-	       unsigned int pflags)
-{
-	if ((pflags & PTHREAD_START_CUSTOM) == 0) {
-		void *stackaddr = self;
-		_pthread_struct_init(self, &_pthread_attr_default,
-				stackaddr, stacksize,
-				PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize));
-
-		if (pflags & PTHREAD_START_SETSCHED) {
-			self->policy = ((pflags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK);
-			self->param.sched_priority = (pflags & PTHREAD_START_IMPORTANCE_MASK);
-		}
-
-		if ((pflags & PTHREAD_START_DETACHED) == PTHREAD_START_DETACHED)  {
-			self->detached &= ~PTHREAD_CREATE_JOINABLE;
-			self->detached |= PTHREAD_CREATE_DETACHED;
-		}
-	}
-
-	if ((pflags & PTHREAD_START_QOSCLASS) != 0) {
-		/* The QoS class is cached in the TSD of the pthread, so to reflect the
-		 * class that the kernel brought us up at, the TSD must be primed from the
-		 * flags parameter.
-		 */
-		self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (pflags & PTHREAD_START_QOSCLASS_MASK);
-	} else {
-		/* Give the thread a default QoS tier, of zero. */
-		self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
-	}
-
+_pthread_start(pthread_t self, mach_port_t kport,
+		__unused void *(*fun)(void *), __unused void *arg,
+		__unused size_t stacksize, unsigned int pflags)
+{
 	bool thread_tsd_bsd_set = (bool)(pflags & PTHREAD_START_TSD_BASE_SET);
 
+	if (os_unlikely(pflags & PTHREAD_START_SUSPENDED)) {
+		PTHREAD_INTERNAL_CRASH(0,
+				"kernel without PTHREAD_START_SUSPENDED support");
+	}
 #if DEBUG
 	PTHREAD_ASSERT(MACH_PORT_VALID(kport));
 	PTHREAD_ASSERT(_pthread_kernel_thread(self) == kport);
@@ -756,20 +868,13 @@ _pthread_start(pthread_t self,
 	// will mark the thread initialized
 	_pthread_markcancel_if_canceled(self, kport);
 
-	self->fun = fun;
-	self->arg = arg;
-
 	_pthread_body(self, !thread_tsd_bsd_set);
 }
 
 PTHREAD_ALWAYS_INLINE
 static inline void
-_pthread_struct_init(pthread_t t,
-		     const pthread_attr_t *attrs,
-		     void *stackaddr,
-		     size_t stacksize,
-		     void *freeaddr,
-		     size_t freesize)
+_pthread_struct_init(pthread_t t, const pthread_attr_t *attrs,
+		void *stackaddr, size_t stacksize, void *freeaddr, size_t freesize)
 {
 #if DEBUG
 	PTHREAD_ASSERT(t->sig != _PTHREAD_SIG);
@@ -777,23 +882,34 @@ _pthread_struct_init(pthread_t t,
 
 	t->sig = _PTHREAD_SIG;
 	t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = t;
-	t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+	t->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &t->err_no;
+	if (attrs->schedset == 0) {
+		t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass;
+	} else {
+		t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] =
+				_pthread_unspecified_priority();
+	}
+	t->tsd[_PTHREAD_TSD_SLOT_PTR_MUNGE] = _pthread_ptr_munge_token;
+	t->tl_has_custom_stack = (attrs->stackaddr != NULL);
+
 	_PTHREAD_LOCK_INIT(t->lock);
 
 	t->stackaddr = stackaddr;
-	t->stacksize = stacksize;
+	t->stackbottom = stackaddr - stacksize;
 	t->freeaddr = freeaddr;
 	t->freesize = freesize;
 
-	t->guardsize = attrs->guardsize;
-	t->detached = attrs->detached;
+	t->guardsize = _pthread_attr_guardsize(attrs);
+	t->tl_joinable = (attrs->detached == PTHREAD_CREATE_JOINABLE);
 	t->inherit = attrs->inherit;
-	t->policy = attrs->policy;
+	t->tl_policy = attrs->policy;
 	t->schedset = attrs->schedset;
-	t->param = attrs->param;
+	_pthread_attr_get_schedparam(attrs, &t->tl_param);
 	t->cancel_state = PTHREAD_CANCEL_ENABLE | PTHREAD_CANCEL_DEFERRED;
 }
 
+#pragma mark pthread public interface
+
 /* Need to deprecate this in future */
 int
 _pthread_is_threaded(void)
@@ -818,7 +934,7 @@ mach_port_t
 pthread_mach_thread_np(pthread_t t)
 {
 	mach_port_t kport = MACH_PORT_NULL;
-	(void)_pthread_is_valid(t, 0, &kport);
+	(void)_pthread_is_valid(t, &kport);
 	return kport;
 }
 
@@ -831,7 +947,7 @@ pthread_from_mach_thread_np(mach_port_t kernel_thread)
 	/* No need to wait as mach port is already known */
 	_PTHREAD_LOCK(_pthread_list_lock);
 
-	TAILQ_FOREACH(p, &__pthread_head, plist) {
+	TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
 		if (_pthread_kernel_thread(p) == kernel_thread) {
 			break;
 		}
@@ -847,6 +963,7 @@ size_t
 pthread_get_stacksize_np(pthread_t t)
 {
 	size_t size = 0;
+	size_t stacksize = t->stackaddr - t->stackbottom;
 
 	if (t == NULL) {
 		return ESRCH; // XXX bug?
@@ -863,7 +980,7 @@ pthread_get_stacksize_np(pthread_t t)
 	//
 	// Of course, on arm rlim_cur == rlim_max and there's only the one guard
 	// page.  So, we can skip all this there.
-	if (t == &_thread && t->stacksize + vm_page_size != t->freesize) {
+	if (t == main_thread() && stacksize + vm_page_size != t->freesize) {
 		// We want to call getrlimit() just once, as it's relatively expensive
 		static size_t rlimit_stack;
 
@@ -877,55 +994,46 @@ pthread_get_stacksize_np(pthread_t t)
 		}
 
 		if (rlimit_stack == 0 || rlimit_stack > t->freesize) {
-			return t->stacksize;
+			return stacksize;
 		} else {
 			return rlimit_stack;
 		}
 	}
 #endif /* !defined(__arm__) && !defined(__arm64__) */
 
-	if (t == pthread_self() || t == &_thread) {
-		return t->stacksize;
+	if (t == pthread_self() || t == main_thread()) {
+		size = stacksize;
+		goto out;
 	}
 
-	_PTHREAD_LOCK(_pthread_list_lock);
-
-	if (_pthread_is_valid_locked(t)) {
-		size = t->stacksize;
+	if (_pthread_validate_thread_and_list_lock(t)) {
+		size = stacksize;
+		_PTHREAD_UNLOCK(_pthread_list_lock);
 	} else {
 		size = ESRCH; // XXX bug?
 	}
 
-	_PTHREAD_UNLOCK(_pthread_list_lock);
-
-	return size;
+out:
+	// <rdar://problem/42588315> binary compatibility issues force us to return
+	// DEFAULT_STACK_SIZE here when we do not know the size of the stack
+	return size ? size : DEFAULT_STACK_SIZE;
 }
 
 PTHREAD_NOEXPORT_VARIANT
 void *
 pthread_get_stackaddr_np(pthread_t t)
 {
-	void *addr = NULL;
-
-	if (t == NULL) {
-		return (void *)(uintptr_t)ESRCH; // XXX bug?
-	}
-
 	// since the main thread will not get de-allocated from underneath us
-	if (t == pthread_self() || t == &_thread) {
+	if (t == pthread_self() || t == main_thread()) {
 		return t->stackaddr;
 	}
 
-	_PTHREAD_LOCK(_pthread_list_lock);
-
-	if (_pthread_is_valid_locked(t)) {
-		addr = t->stackaddr;
-	} else {
-		addr = (void *)(uintptr_t)ESRCH; // XXX bug?
+	if (!_pthread_validate_thread_and_list_lock(t)) {
+		return (void *)(uintptr_t)ESRCH; // XXX bug?
 	}
 
+	void *addr = t->stackaddr;
 	_PTHREAD_UNLOCK(_pthread_list_lock);
-
 	return addr;
 }
 
@@ -979,32 +1087,30 @@ _pthread_dealloc_special_reply_port(pthread_t t)
 {
 	mach_port_t special_reply_port = _pthread_special_reply_port(t);
 	if (special_reply_port != MACH_PORT_NULL) {
-		mach_port_mod_refs(mach_task_self(), special_reply_port,
-				MACH_PORT_RIGHT_RECEIVE, -1);
+		thread_destruct_special_reply_port(special_reply_port,
+				THREAD_SPECIAL_REPLY_PORT_ALL);
 	}
 }
 
 pthread_t
 pthread_main_thread_np(void)
 {
-	return &_thread;
+	return main_thread();
 }
 
 /* returns non-zero if the current thread is the main thread */
 int
 pthread_main_np(void)
 {
-	pthread_t self = pthread_self();
-
-	return ((self->detached & _PTHREAD_CREATE_PARENT) == _PTHREAD_CREATE_PARENT);
+	return pthread_self() == main_thread();
 }
 
 
-/* if we are passed in a pthread_t that is NULL, then we return
-   the current thread's thread_id. So folks don't have to call
-   pthread_self, in addition to us doing it, if they just want
-   their thread_id.
-*/
+/*
+ * if we are passed in a pthread_t that is NULL, then we return the current
+ * thread's thread_id. So folks don't have to call pthread_self, in addition to
+ * us doing it, if they just want their thread_id.
+ */
 PTHREAD_NOEXPORT_VARIANT
 int
 pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
@@ -1018,11 +1124,10 @@ pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
 
 	if (thread == NULL || thread == self) {
 		*thread_id = self->thread_id;
+	} else if (!_pthread_validate_thread_and_list_lock(thread)) {
+		res = ESRCH;
 	} else {
-		_PTHREAD_LOCK(_pthread_list_lock);
-		if (!_pthread_is_valid_locked(thread)) {
-			res = ESRCH;
-		} else if (thread->thread_id == 0) {
+		if (thread->thread_id == 0) {
 			res = EINVAL;
 		} else {
 			*thread_id = thread->thread_id;
@@ -1036,20 +1141,18 @@ PTHREAD_NOEXPORT_VARIANT
 int
 pthread_getname_np(pthread_t thread, char *threadname, size_t len)
 {
-	int res = 0;
+	if (thread == pthread_self()) {
+		strlcpy(threadname, thread->pthread_name, len);
+		return 0;
+	}
 
-	if (thread == NULL) {
+	if (!_pthread_validate_thread_and_list_lock(thread)) {
 		return ESRCH;
 	}
 
-	_PTHREAD_LOCK(_pthread_list_lock);
-	if (_pthread_is_valid_locked(thread)) {
-		strlcpy(threadname, thread->pthread_name, len);
-	} else {
-		res = ESRCH;
-	}
+	strlcpy(threadname, thread->pthread_name, len);
 	_PTHREAD_UNLOCK(_pthread_list_lock);
-	return res;
+	return 0;
 }
 
 
@@ -1079,219 +1182,122 @@ pthread_setname_np(const char *name)
 
 PTHREAD_ALWAYS_INLINE
 static inline void
-__pthread_add_thread(pthread_t t, const pthread_attr_t *attrs,
-		bool parent, bool from_mach_thread)
+__pthread_add_thread(pthread_t t, bool from_mach_thread)
 {
-	bool should_deallocate = false;
-	bool should_add = true;
-
-	mach_port_t kport = _pthread_kernel_thread(t);
-	if (os_slowpath(!MACH_PORT_VALID(kport))) {
-		PTHREAD_CLIENT_CRASH(kport,
-				"Unable to allocate thread port, possible port leak");
-	}
-
 	if (from_mach_thread) {
 		_PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
 	} else {
 		_PTHREAD_LOCK(_pthread_list_lock);
 	}
 
-	// The parent and child threads race to add the thread to the list.
-	// When called by the parent:
-	//  - set parentcheck to true
-	//  - back off if childrun is true
-	// When called by the child:
-	//  - set childrun to true
-	//  - back off if parentcheck is true
-	if (parent) {
-		t->parentcheck = 1;
-		if (t->childrun) {
-			// child got here first, don't add.
-			should_add = false;
-		}
+	TAILQ_INSERT_TAIL(&__pthread_head, t, tl_plist);
+	_pthread_count++;
 
-		// If the child exits before we check in then it has to keep
-		// the thread structure memory alive so our dereferences above
-		// are valid. If it's a detached thread, then no joiner will
-		// deallocate the thread structure itself. So we do it here.
-		if (t->childexit) {
-			should_add = false;
-			should_deallocate = ((t->detached & PTHREAD_CREATE_DETACHED) == PTHREAD_CREATE_DETACHED);
-		}
+	if (from_mach_thread) {
+		_PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
 	} else {
-		t->childrun = 1;
-		if (t->parentcheck) {
-			// Parent got here first, don't add.
-			should_add = false;
-		}
-		if (t->wqthread) {
-			// Work queue threads have no parent. Simulate.
-			t->parentcheck = 1;
-		}
+		_PTHREAD_UNLOCK(_pthread_list_lock);
 	}
 
-	if (should_add) {
-		TAILQ_INSERT_TAIL(&__pthread_head, t, plist);
-		_pthread_count++;
-
-		/*
-		 * Set some initial values which we know in the pthread structure in
-		 * case folks try to get the values before the thread can set them.
-		 */
-		if (parent && attrs && attrs->schedset == 0) {
-			t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass;
-		}
+	if (!from_mach_thread) {
+		// PR-26275485: Mach threads will likely crash trying to run
+		// introspection code.  Since the fall out from the introspection
+		// code not seeing the injected thread is likely less than crashing
+		// in the introspection code, just don't make the call.
+		_pthread_introspection_thread_create(t);
 	}
+}
 
-	if (from_mach_thread){
-		_PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
+PTHREAD_ALWAYS_INLINE
+static inline void
+__pthread_undo_add_thread(pthread_t t, bool from_mach_thread)
+{
+	if (from_mach_thread) {
+		_PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
 	} else {
-		_PTHREAD_UNLOCK(_pthread_list_lock);
+		_PTHREAD_LOCK(_pthread_list_lock);
 	}
 
-	if (parent) {
-		if (!from_mach_thread) {
-			// PR-26275485: Mach threads will likely crash trying to run
-			// introspection code.  Since the fall out from the introspection
-			// code not seeing the injected thread is likely less than crashing
-			// in the introspection code, just don't make the call.
-			_pthread_introspection_thread_create(t, should_deallocate);
-		}
-		if (should_deallocate) {
-			_pthread_deallocate(t);
-		}
+	TAILQ_REMOVE(&__pthread_head, t, tl_plist);
+	_pthread_count--;
+
+	if (from_mach_thread) {
+		_PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
 	} else {
-		_pthread_introspection_thread_start(t);
+		_PTHREAD_UNLOCK(_pthread_list_lock);
 	}
 }
 
-// <rdar://problem/12544957> must always inline this function to avoid epilogues
-// Returns EBUSY if the thread structure should be kept alive (is joinable).
-// Returns ESRCH if the thread structure is no longer valid (was detached).
 PTHREAD_ALWAYS_INLINE
-static inline int
-__pthread_remove_thread(pthread_t t, bool child, bool *should_exit)
+static inline void
+__pthread_started_thread(pthread_t t)
 {
-	int ret = 0;
-
-	bool should_remove = true;
-
-	_PTHREAD_LOCK(_pthread_list_lock);
-
-	// When a thread removes itself:
-	//  - Set the childexit flag indicating that the thread has exited.
-	//  - Return false if parentcheck is zero (must keep structure)
-	//  - If the thread is joinable, keep it on the list so that
-	//    the join operation succeeds. Still decrement the running
-	//    thread count so that we exit if no threads are running.
-	//  - Update the running thread count.
-	// When another thread removes a joinable thread:
-	//  - CAREFUL not to dereference the thread before verifying that the
-	//    reference is still valid using _pthread_is_valid_locked().
-	//  - Remove the thread from the list.
-
-	if (child) {
-		t->childexit = 1;
-		if (t->parentcheck == 0) {
-			ret = EBUSY;
-		}
-		if ((t->detached & PTHREAD_CREATE_JOINABLE) != 0) {
-			ret = EBUSY;
-			should_remove = false;
-		}
-		*should_exit = (--_pthread_count <= 0);
-	} else if (!_pthread_is_valid_locked(t)) {
-		ret = ESRCH;
-		should_remove = false;
-	} else if ((t->detached & PTHREAD_CREATE_JOINABLE) == 0) {
-		// If we found a thread but it's not joinable, bail.
-		ret = ESRCH;
-		should_remove = false;
-	} else if (t->parentcheck == 0) {
-		// If we're not the child thread *and* the parent has not finished
-		// creating the thread yet, then we are another thread that's joining
-		// and we cannot deallocate the pthread.
-		ret = EBUSY;
-	}
-	if (should_remove) {
-		TAILQ_REMOVE(&__pthread_head, t, plist);
+	mach_port_t kport = _pthread_kernel_thread(t);
+	if (os_slowpath(!MACH_PORT_VALID(kport))) {
+		PTHREAD_CLIENT_CRASH(kport,
+				"Unable to allocate thread port, possible port leak");
 	}
-
-	_PTHREAD_UNLOCK(_pthread_list_lock);
-
-	return ret;
+	_pthread_introspection_thread_start(t);
 }
 
+#define _PTHREAD_CREATE_NONE              0x0
+#define _PTHREAD_CREATE_FROM_MACH_THREAD  0x1
+#define _PTHREAD_CREATE_SUSPENDED         0x2
+
 static int
-_pthread_create(pthread_t *thread,
-	const pthread_attr_t *attr,
-	void *(*start_routine)(void *),
-	void *arg,
-	bool from_mach_thread)
+_pthread_create(pthread_t *thread, const pthread_attr_t *attrs,
+		void *(*start_routine)(void *), void *arg, unsigned int create_flags)
 {
 	pthread_t t = NULL;
-	unsigned int flags = 0;
+	void *stack = NULL;
+	bool from_mach_thread = (create_flags & _PTHREAD_CREATE_FROM_MACH_THREAD);
 
-	pthread_attr_t *attrs = (pthread_attr_t *)attr;
 	if (attrs == NULL) {
 		attrs = &_pthread_attr_default;
 	} else if (attrs->sig != _PTHREAD_ATTR_SIG) {
 		return EINVAL;
 	}
 
-	if (attrs->detached == PTHREAD_CREATE_DETACHED) {
-		flags |= PTHREAD_START_DETACHED;
-	}
-
+	unsigned int flags = PTHREAD_START_CUSTOM;
 	if (attrs->schedset != 0) {
+		struct sched_param p;
+		_pthread_attr_get_schedparam(attrs, &p);
 		flags |= PTHREAD_START_SETSCHED;
 		flags |= ((attrs->policy & PTHREAD_START_POLICY_MASK) << PTHREAD_START_POLICY_BITSHIFT);
-		flags |= (attrs->param.sched_priority & PTHREAD_START_IMPORTANCE_MASK);
+		flags |= (p.sched_priority & PTHREAD_START_IMPORTANCE_MASK);
 	} else if (attrs->qosclass != 0) {
 		flags |= PTHREAD_START_QOSCLASS;
 		flags |= (attrs->qosclass & PTHREAD_START_QOSCLASS_MASK);
 	}
+	if (create_flags & _PTHREAD_CREATE_SUSPENDED) {
+		flags |= PTHREAD_START_SUSPENDED;
+	}
 
 	__is_threaded = 1;
 
-	void *stack;
-
-	if (attrs->fastpath) {
-		// kernel will allocate thread and stack, pass stacksize.
-		stack = (void *)attrs->stacksize;
-	} else {
-		// allocate the thread and its stack
-		flags |= PTHREAD_START_CUSTOM;
-
-		int res;
-		res = _pthread_allocate(&t, attrs, &stack);
-		if (res) {
-			return res;
-		}
-
-		t->arg = arg;
-		t->fun = start_routine;
+	t =_pthread_allocate(attrs, &stack);
+	if (t == NULL) {
+		return EAGAIN;
 	}
 
-	pthread_t t2;
-	t2 = __bsdthread_create(start_routine, arg, stack, t, flags);
-	if (t2 == (pthread_t)-1) {
+	t->arg = arg;
+	t->fun = start_routine;
+	__pthread_add_thread(t, from_mach_thread);
+
+	if (__bsdthread_create(start_routine, arg, stack, t, flags) ==
+			(pthread_t)-1) {
 		if (errno == EMFILE) {
 			PTHREAD_CLIENT_CRASH(0,
 					"Unable to allocate thread port, possible port leak");
 		}
-		if (flags & PTHREAD_START_CUSTOM) {
-			// free the thread and stack if we allocated it
-			_pthread_deallocate(t);
-		}
+		__pthread_undo_add_thread(t, from_mach_thread);
+		_pthread_deallocate(t, from_mach_thread);
 		return EAGAIN;
 	}
-	if (t == NULL) {
-		t = t2;
-	}
 
-	__pthread_add_thread(t, attrs, true, from_mach_thread);
+	if (create_flags & _PTHREAD_CREATE_SUSPENDED) {
+		_pthread_markcancel_if_canceled(t, _pthread_kernel_thread(t));
+	}
 
 	// n.b. if a thread is created detached and exits, t will be invalid
 	*thread = t;
@@ -1299,78 +1305,87 @@ _pthread_create(pthread_t *thread,
 }
 
 int
-pthread_create(pthread_t *thread,
-	const pthread_attr_t *attr,
-	void *(*start_routine)(void *),
-	void *arg)
+pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+		void *(*start_routine)(void *), void *arg)
 {
-	return _pthread_create(thread, attr, start_routine, arg, false);
+	unsigned int flags = _PTHREAD_CREATE_NONE;
+	return _pthread_create(thread, attr, start_routine, arg, flags);
 }
 
 int
-pthread_create_from_mach_thread(pthread_t *thread,
-	const pthread_attr_t *attr,
-	void *(*start_routine)(void *),
-	void *arg)
+pthread_create_from_mach_thread(pthread_t *thread, const pthread_attr_t *attr,
+		void *(*start_routine)(void *), void *arg)
 {
-	return _pthread_create(thread, attr, start_routine, arg, true);
+	unsigned int flags = _PTHREAD_CREATE_FROM_MACH_THREAD;
+	return _pthread_create(thread, attr, start_routine, arg, flags);
 }
 
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
+/* Functions defined in machine-dependent files. */
+PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp);
+
 PTHREAD_NORETURN
 static void
 _pthread_suspended_body(pthread_t self)
 {
 	_pthread_set_self(self);
-	__pthread_add_thread(self, NULL, false, false);
+	__pthread_started_thread(self);
 	_pthread_exit(self, (self->fun)(self->arg));
 }
 
-int
-pthread_create_suspended_np(pthread_t *thread,
-	const pthread_attr_t *attr,
-	void *(*start_routine)(void *),
-	void *arg)
+static int
+_pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attrs,
+		void *(*start_routine)(void *), void *arg)
 {
-	int res;
+	pthread_t t;
 	void *stack;
 	mach_port_t kernel_thread = MACH_PORT_NULL;
 
-	const pthread_attr_t *attrs = attr;
 	if (attrs == NULL) {
 		attrs = &_pthread_attr_default;
 	} else if (attrs->sig != _PTHREAD_ATTR_SIG) {
 		return EINVAL;
 	}
 
-	pthread_t t;
-	res = _pthread_allocate(&t, attrs, &stack);
-	if (res) {
-		return res;
+	t = _pthread_allocate(attrs, &stack);
+	if (t == NULL) {
+		return EAGAIN;
 	}
 
-	*thread = t;
-
-	kern_return_t kr;
-	kr = thread_create(mach_task_self(), &kernel_thread);
-	if (kr != KERN_SUCCESS) {
-		//PTHREAD_ABORT("thread_create() failed: %d", kern_res);
-		return EINVAL; /* Need better error here? */
+	if (thread_create(mach_task_self(), &kernel_thread) != KERN_SUCCESS) {
+		_pthread_deallocate(t, false);
+		return EAGAIN;
 	}
 
 	_pthread_set_kernel_thread(t, kernel_thread);
-	(void)pthread_setschedparam_internal(t, kernel_thread, t->policy, &t->param);
+	(void)pthread_setschedparam_internal(t, kernel_thread,
+			t->tl_policy, &t->tl_param);
 
 	__is_threaded = 1;
 
 	t->arg = arg;
 	t->fun = start_routine;
-
 	t->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
-	__pthread_add_thread(t, NULL, true, false);
+	__pthread_add_thread(t, false);
 
 	// Set up a suspended thread.
-	_pthread_setup(t, _pthread_suspended_body, stack, 1, 0);
-	return res;
+	_pthread_setup_suspended(t, _pthread_suspended_body, stack);
+	*thread = t;
+	return 0;
+}
+#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX
+
+int
+pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attr,
+		void *(*start_routine)(void *), void *arg)
+{
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
+	if (_os_xbs_chrooted) {
+		return _pthread_create_suspended_np(thread, attr, start_routine, arg);
+	}
+#endif
+	unsigned int flags = _PTHREAD_CREATE_SUSPENDED;
+	return _pthread_create(thread, attr, start_routine, arg, flags);
 }
 
 
@@ -1379,33 +1394,31 @@ int
 pthread_detach(pthread_t thread)
 {
 	int res = 0;
-	bool join = false;
-	semaphore_t sema = SEMAPHORE_NULL;
+	bool join = false, wake = false;
 
-	if (!_pthread_is_valid(thread, PTHREAD_IS_VALID_LOCK_THREAD, NULL)) {
-		return ESRCH; // Not a valid thread to detach.
+	if (!_pthread_validate_thread_and_list_lock(thread)) {
+		return ESRCH;
 	}
 
-	if ((thread->detached & PTHREAD_CREATE_DETACHED) ||
-			!(thread->detached & PTHREAD_CREATE_JOINABLE)) {
+	if (!thread->tl_joinable) {
 		res = EINVAL;
-	} else if (thread->detached & _PTHREAD_EXITED) {
+	} else if (thread->tl_exit_gate == MACH_PORT_DEAD) {
 		// Join the thread if it's already exited.
 		join = true;
 	} else {
-		thread->detached &= ~PTHREAD_CREATE_JOINABLE;
-		thread->detached |= PTHREAD_CREATE_DETACHED;
-		sema = thread->joiner_notify;
+		thread->tl_joinable = false; // _pthread_joiner_prepost_wake uses this
+		if (thread->tl_join_ctx) {
+			(void)_pthread_joiner_prepost_wake(thread);
+			wake = true;
+		}
 	}
-
-	_PTHREAD_UNLOCK(thread->lock);
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 
 	if (join) {
 		pthread_join(thread, NULL);
-	} else if (sema) {
-		semaphore_signal(sema);
+	} else if (wake) {
+		_pthread_joiner_wake(thread);
 	}
-
 	return res;
 }
 
@@ -1418,7 +1431,7 @@ pthread_kill(pthread_t th, int sig)
 	}
 
 	mach_port_t kport = MACH_PORT_NULL;
-	if (!_pthread_is_valid(th, 0, &kport)) {
+	if (!_pthread_is_valid(th, &kport)) {
 		return ESRCH; // Not a valid thread.
 	}
 
@@ -1452,18 +1465,19 @@ __pthread_workqueue_setkill(int enable)
 /* For compatibility... */
 
 pthread_t
-_pthread_self(void) {
+_pthread_self(void)
+{
 	return pthread_self();
 }
 
 /*
  * Terminate a thread.
  */
-int __disable_threadsignal(int);
+extern int __disable_threadsignal(int);
 
 PTHREAD_NORETURN
 static void
-_pthread_exit(pthread_t self, void *value_ptr)
+_pthread_exit(pthread_t self, void *exit_value)
 {
 	struct __darwin_pthread_handler_rec *handler;
 
@@ -1471,7 +1485,7 @@ _pthread_exit(pthread_t self, void *value_ptr)
 	__disable_threadsignal(1);
 
 	// Set cancel state to disable and type to deferred
-	_pthread_setcancelstate_exit(self, value_ptr, __unix_conforming);
+	_pthread_setcancelstate_exit(self, exit_value);
 
 	while ((handler = self->__cleanup_stack) != 0) {
 		(handler->__routine)(handler->__arg);
@@ -1479,71 +1493,44 @@ _pthread_exit(pthread_t self, void *value_ptr)
 	}
 	_pthread_tsd_cleanup(self);
 
-	_PTHREAD_LOCK(self->lock);
-	self->detached |= _PTHREAD_EXITED;
-	self->exit_value = value_ptr;
-
-	if ((self->detached & PTHREAD_CREATE_JOINABLE) &&
-			self->joiner_notify == SEMAPHORE_NULL) {
-		self->joiner_notify = (semaphore_t)os_get_cached_semaphore();
-	}
-	_PTHREAD_UNLOCK(self->lock);
-
 	// Clear per-thread semaphore cache
 	os_put_cached_semaphore(SEMAPHORE_NULL);
 
-	_pthread_terminate_invoke(self);
+	_pthread_terminate_invoke(self, exit_value);
 }
 
 void
-pthread_exit(void *value_ptr)
+pthread_exit(void *exit_value)
 {
 	pthread_t self = pthread_self();
-	if (self->wqthread == 0) {
-		_pthread_exit(self, value_ptr);
-	} else {
-		PTHREAD_ABORT("pthread_exit() may only be called against threads created via pthread_create()");
+	if (os_unlikely(self->wqthread)) {
+		PTHREAD_CLIENT_CRASH(0, "pthread_exit() called from a thread "
+				"not created by pthread_create()");
 	}
+	_pthread_exit(self, exit_value);
 }
 
 
 PTHREAD_NOEXPORT_VARIANT
 int
-pthread_getschedparam(pthread_t thread,
-		      int *policy,
-		      struct sched_param *param)
+pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param)
 {
-	int ret = 0;
-
-	if (thread == NULL) {
+	if (!_pthread_validate_thread_and_list_lock(thread)) {
 		return ESRCH;
 	}
 
-	_PTHREAD_LOCK(_pthread_list_lock);
-
-	if (_pthread_is_valid_locked(thread)) {
-		if (policy) {
-			*policy = thread->policy;
-		}
-		if (param) {
-			*param = thread->param;
-		}
-	} else {
-		ret = ESRCH;
-	}
-
+	if (policy) *policy = thread->tl_policy;
+	if (param) *param = thread->tl_param;
 	_PTHREAD_UNLOCK(_pthread_list_lock);
-
-	return ret;
+	return 0;
 }
 
 
+
 PTHREAD_ALWAYS_INLINE
 static inline int
-pthread_setschedparam_internal(pthread_t thread,
-		      mach_port_t kport,
-		      int policy,
-		      const struct sched_param *param)
+pthread_setschedparam_internal(pthread_t thread, mach_port_t kport, int policy,
+		const struct sched_param *param)
 {
 	policy_base_data_t bases;
 	policy_base_t base;
@@ -1575,41 +1562,37 @@ pthread_setschedparam_internal(pthread_t thread,
 	return (ret != KERN_SUCCESS) ? EINVAL : 0;
 }
 
-
 PTHREAD_NOEXPORT_VARIANT
 int
 pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
 {
 	mach_port_t kport = MACH_PORT_NULL;
-	int res;
 	int bypass = 1;
 
 	// since the main thread will not get de-allocated from underneath us
-	if (t == pthread_self() || t == &_thread) {
+	if (t == pthread_self() || t == main_thread()) {
 		kport = _pthread_kernel_thread(t);
 	} else {
 		bypass = 0;
-		(void)_pthread_is_valid(t, 0, &kport);
+		if (!_pthread_is_valid(t, &kport)) {
+			return ESRCH;
+		}
 	}
 
-	res = pthread_setschedparam_internal(t, kport, policy, param);
-	if (res == 0) {
-		if (bypass == 0) {
-			// Ensure the thread is still valid.
-			_PTHREAD_LOCK(_pthread_list_lock);
-			if (_pthread_is_valid_locked(t)) {
-				t->policy = policy;
-				t->param = *param;
-			} else {
-				res = ESRCH;
-			}
-			_PTHREAD_UNLOCK(_pthread_list_lock);
-		}  else {
-			t->policy = policy;
-			t->param = *param;
-		}
+	int res = pthread_setschedparam_internal(t, kport, policy, param);
+	if (res) return res;
+
+	if (bypass) {
+		_PTHREAD_LOCK(_pthread_list_lock);
+	} else if (!_pthread_validate_thread_and_list_lock(t)) {
+		// Ensure the thread is still valid.
+		return ESRCH;
 	}
-	return res;
+
+	t->tl_policy = policy;
+	t->tl_param = *param;
+	_PTHREAD_UNLOCK(_pthread_list_lock);
+	return 0;
 }
 
 
@@ -1639,25 +1622,49 @@ PTHREAD_NOINLINE
 void
 _pthread_set_self(pthread_t p)
 {
-	return _pthread_set_self_internal(p, true);
+#if VARIANT_DYLD
+	if (os_likely(!p)) {
+		return _pthread_set_self_dyld();
+	}
+#endif // VARIANT_DYLD
+	_pthread_set_self_internal(p, true);
 }
 
-PTHREAD_ALWAYS_INLINE
-static inline void
-_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set)
+#if VARIANT_DYLD
+// _pthread_set_self_dyld is noinline+noexport to allow the option for
+// static libsyscall to adopt this as the entry point from mach_init if
+// desired
+PTHREAD_NOINLINE PTHREAD_NOEXPORT
+void
+_pthread_set_self_dyld(void)
 {
-	if (p == NULL) {
-		p = &_thread;
-	}
+	pthread_t p = main_thread();
+	p->thread_id = __thread_selfid();
 
-	uint64_t tid = __thread_selfid();
-	if (tid == -1ull) {
-		PTHREAD_ABORT("failed to set thread_id");
+	if (os_unlikely(p->thread_id == -1ull)) {
+		PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id");
 	}
 
+	// <rdar://problem/40930651> pthread self and the errno address are the
+	// bare minimium TSD setup that dyld needs to actually function.  Without
+	// this, TSD access will fail and crash if it uses bits of Libc prior to
+	// library initialization. __pthread_init will finish the initialization
+	// during library init.
 	p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = p;
 	p->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &p->err_no;
-	p->thread_id = tid;
+	_thread_set_tsd_base(&p->tsd[0]);
+}
+#endif // VARIANT_DYLD
+
+PTHREAD_ALWAYS_INLINE
+static inline void
+_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set)
+{
+	p->thread_id = __thread_selfid();
+
+	if (os_unlikely(p->thread_id == -1ull)) {
+		PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id");
+	}
 
 	if (needs_tsd_base_set) {
 		_thread_set_tsd_base(&p->tsd[0]);
@@ -1719,6 +1726,32 @@ pthread_setconcurrency(int new_level)
 	return 0;
 }
 
+#if !defined(VARIANT_STATIC)
+void *
+malloc(size_t sz)
+{
+	if (_pthread_malloc) {
+		return _pthread_malloc(sz);
+	} else {
+		return NULL;
+	}
+}
+
+void
+free(void *p)
+{
+	if (_pthread_free) {
+		_pthread_free(p);
+	}
+}
+#endif // VARIANT_STATIC
+
+/*
+ * Perform package initialization - called automatically when application starts
+ */
+struct ProgramVars; /* forward reference */
+
+#if !VARIANT_DYLD
 static unsigned long
 _pthread_strtoul(const char *p, const char **endptr, int base)
 {
@@ -1777,36 +1810,29 @@ out:
 	return ret;
 }
 
-#if !defined(VARIANT_STATIC)
-void *
-malloc(size_t sz)
+static void
+parse_ptr_munge_params(const char *envp[], const char *apple[])
 {
-	if (_pthread_malloc) {
-		return _pthread_malloc(sz);
-	} else {
-		return NULL;
+	const char *p, *s;
+	p = _simple_getenv(apple, "ptr_munge");
+	if (p) {
+		_pthread_ptr_munge_token = _pthread_strtoul(p, &s, 16);
+		bzero((char *)p, strlen(p));
 	}
-}
-
-void
-free(void *p)
-{
-	if (_pthread_free) {
-		_pthread_free(p);
+#if !DEBUG
+	if (_pthread_ptr_munge_token) return;
+#endif
+	p = _simple_getenv(envp, "PTHREAD_PTR_MUNGE_TOKEN");
+	if (p) {
+		uintptr_t t = _pthread_strtoul(p, &s, 16);
+		if (t) _pthread_ptr_munge_token = t;
 	}
 }
-#endif // VARIANT_STATIC
-
-/*
- * Perform package initialization - called automatically when application starts
- */
-struct ProgramVars; /* forward reference */
 
 int
 __pthread_init(const struct _libpthread_functions *pthread_funcs,
-	       const char *envp[] __unused,
-	       const char *apple[],
-	       const struct ProgramVars *vars __unused)
+		const char *envp[], const char *apple[],
+		const struct ProgramVars *vars __unused)
 {
 	// Save our provided pushed-down functions
 	if (pthread_funcs) {
@@ -1829,11 +1855,11 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
 	host_t host = mach_host_self();
 	kr = host_info(host, flavor, (host_info_t)&priority_info, &count);
 	if (kr != KERN_SUCCESS) {
-		PTHREAD_ABORT("host_info(mach_host_self(), ...) failed: %s", mach_error_string(kr));
+		PTHREAD_INTERNAL_CRASH(kr, "host_info() failed");
 	} else {
-		default_priority = priority_info.user_priority;
-		min_priority = priority_info.minimum_priority;
-		max_priority = priority_info.maximum_priority;
+		default_priority = (uint8_t)priority_info.user_priority;
+		min_priority = (uint8_t)priority_info.minimum_priority;
+		max_priority = (uint8_t)priority_info.maximum_priority;
 	}
 	mach_port_deallocate(mach_task_self(), host);
 
@@ -1863,12 +1889,22 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
 		allocsize = 0;
 	}
 
-	pthread_t thread = &_thread;
-	pthread_attr_init(&_pthread_attr_default);
+	// Initialize random ptr_munge token from the kernel.
+	parse_ptr_munge_params(envp, apple);
+
+	// libpthread.a in dyld "owns" the main thread structure itself and sets
+	// up the tsd to point to it. So take the pthread_self() from there
+	// and make it our main thread point.
+	pthread_t thread = (pthread_t)_pthread_getspecific_direct(
+			_PTHREAD_TSD_SLOT_PTHREAD_SELF);
+	PTHREAD_ASSERT(thread);
+	_main_thread_ptr = thread;
+
+	PTHREAD_ASSERT(_pthread_attr_default.qosclass ==
+			_pthread_default_priority(0));
 	_pthread_struct_init(thread, &_pthread_attr_default,
-			     stackaddr, stacksize,
-			     allocaddr, allocsize);
-	thread->detached = PTHREAD_CREATE_JOINABLE;
+			stackaddr, stacksize, allocaddr, allocsize);
+	thread->tl_joinable = true;
 
 	// Finish initialization with common code that is reinvoked on the
 	// child side of a fork.
@@ -1897,66 +1933,35 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
 
 	return 0;
 }
+#endif // !VARIANT_DYLD
 
 PTHREAD_NOEXPORT void
 _pthread_main_thread_init(pthread_t p)
 {
 	TAILQ_INIT(&__pthread_head);
 	_PTHREAD_LOCK_INIT(_pthread_list_lock);
-
-	// Re-use the main thread's static storage if no thread was provided.
-	if (p == NULL) {
-		if (_thread.tsd[0] != 0) {
-			bzero(&_thread, sizeof(struct _pthread));
-		}
-		p = &_thread;
-	}
-
 	_PTHREAD_LOCK_INIT(p->lock);
 	_pthread_set_kernel_thread(p, mach_thread_self());
 	_pthread_set_reply_port(p, mach_reply_port());
 	p->__cleanup_stack = NULL;
-	p->joiner_notify = SEMAPHORE_NULL;
-	p->joiner = MACH_PORT_NULL;
-	p->detached |= _PTHREAD_CREATE_PARENT;
+	p->tl_join_ctx = NULL;
+	p->tl_exit_gate = MACH_PORT_NULL;
 	p->tsd[__TSD_SEMAPHORE_CACHE] = (void*)SEMAPHORE_NULL;
+	p->tsd[__TSD_MACH_SPECIAL_REPLY] = 0;
 	p->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
 
 	// Initialize the list of threads with the new main thread.
-	TAILQ_INSERT_HEAD(&__pthread_head, p, plist);
+	TAILQ_INSERT_HEAD(&__pthread_head, p, tl_plist);
 	_pthread_count = 1;
 
-	_pthread_set_self(p);
 	_pthread_introspection_thread_start(p);
 }
 
-int
-_pthread_join_cleanup(pthread_t thread, void ** value_ptr, int conforming)
-{
-	int ret = __pthread_remove_thread(thread, false, NULL);
-	if (ret != 0 && ret != EBUSY) {
-		// Returns ESRCH if the thread was not created joinable.
-		return ret;
-	}
-
-	if (value_ptr) {
-		*value_ptr = _pthread_get_exit_value(thread, conforming);
-	}
-	_pthread_introspection_thread_destroy(thread);
-	if (ret != EBUSY) {
-		// __pthread_remove_thread returns EBUSY if the parent has not
-		// finished creating the thread (and is still expecting the pthread_t
-		// to be alive).
-		_pthread_deallocate(thread);
-	}
-	return 0;
-}
-
 int
 sched_yield(void)
 {
-    swtch_pri(0);
-    return 0;
+	swtch_pri(0);
+	return 0;
 }
 
 // XXX remove
@@ -1974,22 +1979,25 @@ pthread_yield_np(void)
 
 
 
+// Libsystem knows about this symbol and exports it to libsyscall
 PTHREAD_NOEXPORT_VARIANT
 void
 _pthread_clear_qos_tsd(mach_port_t thread_port)
 {
 	if (thread_port == MACH_PORT_NULL || (uintptr_t)_pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF) == thread_port) {
 		/* Clear the current thread's TSD, that can be done inline. */
-		_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0));
+		_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS,
+				_pthread_unspecified_priority());
 	} else {
 		pthread_t p;
 
 		_PTHREAD_LOCK(_pthread_list_lock);
 
-		TAILQ_FOREACH(p, &__pthread_head, plist) {
+		TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
 			mach_port_t kp = _pthread_kernel_thread(p);
 			if (thread_port == kp) {
-				p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
+				p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] =
+						_pthread_unspecified_priority();
 				break;
 			}
 		}
@@ -1999,7 +2007,35 @@ _pthread_clear_qos_tsd(mach_port_t thread_port)
 }
 
 
-/***** pthread workqueue support routines *****/
+#pragma mark pthread/stack_np.h public interface
+
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__)
+typedef uintptr_t frame_data_addr_t;
+
+struct frame_data {
+	frame_data_addr_t frame_addr_next;
+	frame_data_addr_t ret_addr;
+};
+#else
+#error ********** Unimplemented architecture
+#endif
+
+uintptr_t
+pthread_stack_frame_decode_np(uintptr_t frame_addr, uintptr_t *return_addr)
+{
+	struct frame_data *frame = (struct frame_data *)frame_addr;
+
+	if (return_addr) {
+		*return_addr = (uintptr_t)frame->ret_addr;
+	}
+
+	return (uintptr_t)frame->frame_addr_next;
+}
+
+
+#pragma mark pthread workqueue support routines
+
 
 PTHREAD_NOEXPORT void
 _pthread_bsdthread_init(struct _pthread_registration_data *data)
@@ -2011,19 +2047,18 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
 	data->tsd_offset = offsetof(struct _pthread, tsd);
 	data->mach_thread_self_offset = __TSD_MACH_THREAD_SELF * sizeof(void *);
 
-	int rv = __bsdthread_register(thread_start,
-			start_wqthread, (int)PTHREAD_SIZE,
-			(void*)data, (uintptr_t)sizeof(*data),
-			data->dispatch_queue_offset);
+	int rv = __bsdthread_register(thread_start, start_wqthread, (int)PTHREAD_SIZE,
+			(void*)data, (uintptr_t)sizeof(*data), data->dispatch_queue_offset);
 
 	if (rv > 0) {
-		if ((rv & PTHREAD_FEATURE_QOS_DEFAULT) == 0) {
-			PTHREAD_INTERNAL_CRASH(rv,
-					"Missing required support for QOS_CLASS_DEFAULT");
-		}
-		if ((rv & PTHREAD_FEATURE_QOS_MAINTENANCE) == 0) {
-			PTHREAD_INTERNAL_CRASH(rv,
-					"Missing required support for QOS_CLASS_MAINTENANCE");
+		int required_features =
+				PTHREAD_FEATURE_FINEPRIO |
+				PTHREAD_FEATURE_BSDTHREADCTL |
+				PTHREAD_FEATURE_SETSELF |
+				PTHREAD_FEATURE_QOS_MAINTENANCE |
+				PTHREAD_FEATURE_QOS_DEFAULT;
+		if ((rv & required_features) != required_features) {
+			PTHREAD_INTERNAL_CRASH(rv, "Missing required kernel support");
 		}
 		__pthread_supported_features = rv;
 	}
@@ -2039,9 +2074,13 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
 
 	pthread_priority_t main_qos = (pthread_priority_t)data->main_qos;
 
-	if (_pthread_priority_get_qos_newest(main_qos) != QOS_CLASS_UNSPECIFIED) {
+	if (_pthread_priority_thread_qos(main_qos) != THREAD_QOS_UNSPECIFIED) {
 		_pthread_set_main_qos(main_qos);
-		_thread.tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos;
+		main_thread()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos;
+	}
+
+	if (data->stack_addr_hint) {
+		__pthread_stack_hint = data->stack_addr_hint;
 	}
 
 	if (__libdispatch_workerfunction != NULL) {
@@ -2050,191 +2089,188 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
 	}
 }
 
-// workqueue entry point from kernel
-PTHREAD_NORETURN
-void
-_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr, void *keventlist, int flags, int nkevents)
+PTHREAD_NOINLINE
+static void
+_pthread_wqthread_legacy_worker_wrap(pthread_priority_t pp)
+{
+	/* Old thread priorities are inverted from where we have them in
+	 * the new flexible priority scheme. The highest priority is zero,
+	 * up to 2, with background at 3.
+	 */
+	pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
+	bool overcommit = (pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG);
+	int opts = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
+
+	switch (_pthread_priority_thread_qos(pp)) {
+	case THREAD_QOS_USER_INITIATED:
+		return (*func)(WORKQ_HIGH_PRIOQUEUE, opts, NULL);
+	case THREAD_QOS_LEGACY:
+		/* B&I builders can't pass a QOS_CLASS_DEFAULT thread to dispatch, for fear of the QoS being
+		 * picked up by NSThread (et al) and transported around the system. So change the TSD to
+		 * make this thread look like QOS_CLASS_USER_INITIATED even though it will still run as legacy.
+		 */
+		_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS,
+				_pthread_priority_make_from_thread_qos(THREAD_QOS_USER_INITIATED, 0, 0));
+		return (*func)(WORKQ_DEFAULT_PRIOQUEUE, opts, NULL);
+	case THREAD_QOS_UTILITY:
+		return (*func)(WORKQ_LOW_PRIOQUEUE, opts, NULL);
+	case THREAD_QOS_BACKGROUND:
+		return (*func)(WORKQ_BG_PRIOQUEUE, opts, NULL);
+	}
+	PTHREAD_INTERNAL_CRASH(pp, "Invalid pthread priority for the legacy interface");
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline pthread_priority_t
+_pthread_wqthread_priority(int flags)
 {
-	PTHREAD_ASSERT(flags & WQ_FLAG_THREAD_NEWSPI);
+	pthread_priority_t pp = 0;
+	thread_qos_t qos;
+
+	if (flags & WQ_FLAG_THREAD_KEVENT) {
+		pp |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+	}
+	if (flags & WQ_FLAG_THREAD_EVENT_MANAGER) {
+		return pp | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+	}
 
-	bool thread_reuse = flags & WQ_FLAG_THREAD_REUSE;
-	bool overcommit = flags & WQ_FLAG_THREAD_OVERCOMMIT;
-	bool kevent = flags & WQ_FLAG_THREAD_KEVENT;
-	bool workloop = (flags & WQ_FLAG_THREAD_WORKLOOP) &&
-			__libdispatch_workloopfunction != NULL;
-	PTHREAD_ASSERT((!kevent) || (__libdispatch_keventfunction != NULL));
-	PTHREAD_ASSERT(!workloop || kevent);
+	if (flags & WQ_FLAG_THREAD_OVERCOMMIT) {
+		pp |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
+	}
+	if (flags & WQ_FLAG_THREAD_PRIO_QOS) {
+		qos = (thread_qos_t)(flags & WQ_FLAG_THREAD_PRIO_MASK);
+		pp = _pthread_priority_make_from_thread_qos(qos, 0, pp);
+	} else if (flags & WQ_FLAG_THREAD_PRIO_SCHED) {
+		pp |= _PTHREAD_PRIORITY_SCHED_PRI_MASK;
+		pp |= (flags & WQ_FLAG_THREAD_PRIO_MASK);
+	} else {
+		PTHREAD_INTERNAL_CRASH(flags, "Missing priority");
+	}
+	return pp;
+}
 
-	pthread_priority_t priority = 0;
-	unsigned long priority_flags = 0;
+PTHREAD_NOINLINE
+static void
+_pthread_wqthread_setup(pthread_t self, mach_port_t kport, void *stacklowaddr,
+		int flags)
+{
+	void *stackaddr = self;
+	size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr;
 
-	if (overcommit)
-		priority_flags |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-	if (flags & WQ_FLAG_THREAD_EVENT_MANAGER)
-		priority_flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
-	if (kevent)
-		priority_flags |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
+	_pthread_struct_init(self, &_pthread_attr_default, stackaddr, stacksize,
+			PTHREAD_ALLOCADDR(stackaddr, stacksize),
+			PTHREAD_ALLOCSIZE(stackaddr, stacksize));
 
-	int thread_class = flags & WQ_FLAG_THREAD_PRIOMASK;
-	priority = _pthread_priority_make_newest(thread_class, 0, priority_flags);
+	_pthread_set_kernel_thread(self, kport);
+	self->wqthread = 1;
+	self->wqkillset = 0;
+	self->tl_joinable = false;
+	self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
 
-	if (!thread_reuse) {
-		// New thread created by kernel, needs initialization.
-		void *stackaddr = self;
-		size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr;
+	// Update the running thread count and set childrun bit.
+	bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET);
+	_pthread_set_self_internal(self, !thread_tsd_base_set);
+	__pthread_add_thread(self, false);
+	__pthread_started_thread(self);
+}
 
-		_pthread_struct_init(self, &_pthread_attr_default,
-							 stackaddr, stacksize,
-							 PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize));
+PTHREAD_NORETURN PTHREAD_NOINLINE
+static void
+_pthread_wqthread_exit(pthread_t self)
+{
+	pthread_priority_t pp;
+	thread_qos_t qos;
 
-		_pthread_set_kernel_thread(self, kport);
-		self->wqthread = 1;
-		self->wqkillset = 0;
-		self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
+	pp = (pthread_priority_t)self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
+	qos = _pthread_priority_thread_qos(pp);
+	if (qos == THREAD_QOS_UNSPECIFIED || qos > WORKQ_THREAD_QOS_CLEANUP) {
+		// Reset QoS to something low for the cleanup process
+		pp = _pthread_priority_make_from_thread_qos(WORKQ_THREAD_QOS_CLEANUP, 0, 0);
+		self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp;
+	}
 
-		// Not a joinable thread.
-		self->detached &= ~PTHREAD_CREATE_JOINABLE;
-		self->detached |= PTHREAD_CREATE_DETACHED;
+	_pthread_exit(self, NULL);
+}
 
-		// Update the running thread count and set childrun bit.
-		bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET);
-		_pthread_set_self_internal(self, !thread_tsd_base_set);
-		_pthread_introspection_thread_create(self, false);
-		__pthread_add_thread(self, NULL, false, false);
+// workqueue entry point from kernel
+void
+_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr,
+		void *keventlist, int flags, int nkevents)
+{
+	if ((flags & WQ_FLAG_THREAD_REUSE) == 0) {
+		_pthread_wqthread_setup(self, kport, stacklowaddr, flags);
 	}
 
-	// If we're running with fine-grained priority, we also need to
-	// set this thread to have the QoS class provided to use by the kernel
-	if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-		_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(thread_class, 0, priority_flags));
+	pthread_priority_t pp;
+	if (flags & WQ_FLAG_THREAD_OUTSIDEQOS) {
+		self->wqoutsideqos = 1;
+		pp = _pthread_priority_make_from_thread_qos(THREAD_QOS_LEGACY, 0,
+				_PTHREAD_PRIORITY_FALLBACK_FLAG);
+	} else {
+		self->wqoutsideqos = 0;
+		pp = _pthread_wqthread_priority(flags);
 	}
 
-#if WQ_DEBUG
-	PTHREAD_ASSERT(self);
-	PTHREAD_ASSERT(self == pthread_self());
-#endif // WQ_DEBUG
+	self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp;
 
-	if (workloop) {
+	// avoid spills on the stack hard to keep used stack space minimal
+	if (nkevents == WORKQ_EXIT_THREAD_NKEVENT) {
+		goto exit;
+	} else if (flags & WQ_FLAG_THREAD_WORKLOOP) {
 		self->fun = (void *(*)(void*))__libdispatch_workloopfunction;
-	} else if (kevent){
+		self->wq_retop = WQOPS_THREAD_WORKLOOP_RETURN;
+		self->wq_kqid_ptr = ((kqueue_id_t *)keventlist - 1);
+		self->arg = keventlist;
+		self->wq_nevents = nkevents;
+	} else if (flags & WQ_FLAG_THREAD_KEVENT) {
 		self->fun = (void *(*)(void*))__libdispatch_keventfunction;
+		self->wq_retop = WQOPS_THREAD_KEVENT_RETURN;
+		self->wq_kqid_ptr = NULL;
+		self->arg = keventlist;
+		self->wq_nevents = nkevents;
 	} else {
 		self->fun = (void *(*)(void*))__libdispatch_workerfunction;
+		self->wq_retop = WQOPS_THREAD_RETURN;
+		self->wq_kqid_ptr = NULL;
+		self->arg = (void *)(uintptr_t)pp;
+		self->wq_nevents = 0;
+		if (os_likely(__workq_newapi)) {
+			(*__libdispatch_workerfunction)(pp);
+		} else {
+			_pthread_wqthread_legacy_worker_wrap(pp);
+		}
+		goto just_return;
 	}
-	self->arg = (void *)(uintptr_t)thread_class;
-
-	if (kevent && keventlist && nkevents > 0){
-		int errors_out;
-	kevent_errors_retry:
 
-		if (workloop) {
-			kqueue_id_t kevent_id = *(kqueue_id_t*)((char*)keventlist - sizeof(kqueue_id_t));
-			kqueue_id_t kevent_id_in = kevent_id;
-			(__libdispatch_workloopfunction)(&kevent_id, &keventlist, &nkevents);
-			PTHREAD_ASSERT(kevent_id == kevent_id_in || nkevents == 0);
-			errors_out = __workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, keventlist, nkevents, 0);
+	if (nkevents > 0) {
+kevent_errors_retry:
+		if (self->wq_retop == WQOPS_THREAD_WORKLOOP_RETURN) {
+			((pthread_workqueue_function_workloop_t)self->fun)
+					(self->wq_kqid_ptr, &self->arg, &self->wq_nevents);
 		} else {
-			(__libdispatch_keventfunction)(&keventlist, &nkevents);
-			errors_out = __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, keventlist, nkevents, 0);
+			((pthread_workqueue_function_kevent_t)self->fun)
+					(&self->arg, &self->wq_nevents);
 		}
-
-		if (errors_out > 0){
-			nkevents = errors_out;
+		int rc = __workq_kernreturn(self->wq_retop, self->arg, self->wq_nevents, 0);
+		if (os_unlikely(rc > 0)) {
+			self->wq_nevents = rc;
 			goto kevent_errors_retry;
-		} else if (errors_out < 0){
-			PTHREAD_ABORT("kevent return produced an error: %d", errno);
-		}
-		goto thexit;
-    } else if (kevent){
-		if (workloop) {
-			(__libdispatch_workloopfunction)(0, NULL, NULL);
-			__workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, NULL, 0, -1);
-		} else {
-			(__libdispatch_keventfunction)(NULL, NULL);
-			__workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, NULL, 0, 0);
 		}
-
-		goto thexit;
-    }
-
-	if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-		if (!__workq_newapi) {
-			/* Old thread priorities are inverted from where we have them in
-			 * the new flexible priority scheme. The highest priority is zero,
-			 * up to 2, with background at 3.
-			 */
-			pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
-
-			int opts = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
-
-			if ((__pthread_supported_features & PTHREAD_FEATURE_QOS_DEFAULT) == 0) {
-				/* Dirty hack to support kernels that don't have QOS_CLASS_DEFAULT. */
-				switch (thread_class) {
-					case QOS_CLASS_USER_INTERACTIVE:
-						thread_class = QOS_CLASS_USER_INITIATED;
-						break;
-					case QOS_CLASS_USER_INITIATED:
-						thread_class = QOS_CLASS_DEFAULT;
-						break;
-					default:
-						break;
-				}
-			}
-
-			switch (thread_class) {
-				/* QOS_CLASS_USER_INTERACTIVE is not currently requested by for old dispatch priority compatibility */
-				case QOS_CLASS_USER_INITIATED:
-					(*func)(WORKQ_HIGH_PRIOQUEUE, opts, NULL);
-					break;
-
-				case QOS_CLASS_DEFAULT:
-					/* B&I builders can't pass a QOS_CLASS_DEFAULT thread to dispatch, for fear of the QoS being
-					 * picked up by NSThread (et al) and transported around the system. So change the TSD to
-					 * make this thread look like QOS_CLASS_USER_INITIATED even though it will still run as legacy.
-					 */
-					_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, 0));
-					(*func)(WORKQ_DEFAULT_PRIOQUEUE, opts, NULL);
-					break;
-
-				case QOS_CLASS_UTILITY:
-					(*func)(WORKQ_LOW_PRIOQUEUE, opts, NULL);
-					break;
-
-				case QOS_CLASS_BACKGROUND:
-					(*func)(WORKQ_BG_PRIOQUEUE, opts, NULL);
-					break;
-
-				/* Legacy dispatch does not use QOS_CLASS_MAINTENANCE, so no need to handle it here */
-			}
-
-		} else {
-			/* "New" API, where dispatch is expecting to be given the thread priority */
-			(*__libdispatch_workerfunction)(priority);
+		if (os_unlikely(rc < 0)) {
+			PTHREAD_INTERNAL_CRASH(self->err_no, "kevent (workloop) failed");
 		}
 	} else {
-		/* We're the new library running on an old kext, so thread_class is really the workq priority. */
-		pthread_workqueue_function_t func = (pthread_workqueue_function_t)__libdispatch_workerfunction;
-		int options = overcommit ? WORKQ_ADDTHREADS_OPTION_OVERCOMMIT : 0;
-		(*func)(thread_class, options, NULL);
-	}
-
-	__workq_kernreturn(WQOPS_THREAD_RETURN, NULL, 0, 0);
-
-thexit:
-	{
-		pthread_priority_t current_priority = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-		if ((current_priority & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) ||
-			(_pthread_priority_get_qos_newest(current_priority) > WQ_THREAD_CLEANUP_QOS)) {
-			// Reset QoS to something low for the cleanup process
-			priority = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
-			_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
-		}
+just_return:
+		__workq_kernreturn(self->wq_retop, NULL, 0, 0);
 	}
 
-	_pthread_exit(self, NULL);
+exit:
+	_pthread_wqthread_exit(self);
 }
 
-/***** pthread workqueue API for libdispatch *****/
+
+#pragma mark pthread workqueue API for libdispatch
+
 
 _Static_assert(WORKQ_KEVENT_EVENT_BUFFER_LEN == WQ_KEVENT_LIST_LEN,
 		"Kernel and userland should agree on the event list size");
@@ -2329,42 +2365,18 @@ pthread_workqueue_addthreads_np(int queue_priority, int options, int numthreads)
 	}
 
 	pthread_priority_t kp = 0;
+	int compat_priority = queue_priority & WQ_FLAG_THREAD_PRIO_MASK;
+	int flags = 0;
 
-	if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-		/* The new kernel API takes the new QoS class + relative priority style of
-		 * priority. This entry point is here for compatibility with old libdispatch
-		 * versions (ie. the simulator). We request the corresponding new bracket
-		 * from the kernel, then on the way out run all dispatch queues that were
-		 * requested.
-		 */
-
-		int compat_priority = queue_priority & WQ_FLAG_THREAD_PRIOMASK;
-		int flags = 0;
-
-		/* To make sure the library does not issue more threads to dispatch than
-		 * were requested, the total number of active requests is recorded in
-		 * __workq_requests.
-		 */
-		if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
-			flags = _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
-		}
+	if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
+		flags = _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
+	}
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
-		kp = _pthread_qos_class_encode_workqueue(compat_priority, flags);
+	kp = _pthread_qos_class_encode_workqueue(compat_priority, flags);
 #pragma clang diagnostic pop
 
-	} else {
-		/* Running on the old kernel, queue_priority is what we pass directly to
-		 * the syscall.
-		 */
-		kp = queue_priority & WQ_FLAG_THREAD_PRIOMASK;
-
-		if (options & WORKQ_ADDTHREADS_OPTION_OVERCOMMIT) {
-			kp |= WORKQUEUE_OVERCOMMIT;
-		}
-	}
-
 	res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)kp);
 	if (res == -1) {
 		res = errno;
@@ -2391,9 +2403,17 @@ _pthread_workqueue_addthreads(int numthreads, pthread_priority_t priority)
 		return EPERM;
 	}
 
-	if ((__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) == 0) {
-		return ENOTSUP;
-	}
+#if TARGET_OS_OSX
+	// <rdar://problem/37687655> Legacy simulators fail to boot
+	//
+	// Older sims set the deprecated _PTHREAD_PRIORITY_ROOTQUEUE_FLAG wrongly,
+	// which is aliased to _PTHREAD_PRIORITY_SCHED_PRI_FLAG and that XNU
+	// validates and rejects.
+	//
+	// As a workaround, forcefully unset this bit that cannot be set here
+	// anyway.
+	priority &= ~_PTHREAD_PRIORITY_SCHED_PRI_FLAG;
+#endif
 
 	res = __workq_kernreturn(WQOPS_QUEUE_REQTHREADS, NULL, numthreads, (int)priority);
 	if (res == -1) {
@@ -2412,9 +2432,62 @@ _pthread_workqueue_set_event_manager_priority(pthread_priority_t priority)
 	return res;
 }
 
-/*
- * Introspection SPI for libpthread.
- */
+int
+_pthread_workloop_create(uint64_t workloop_id, uint64_t options, pthread_attr_t *attr)
+{
+	struct kqueue_workloop_params params = {
+		.kqwlp_version = sizeof(struct kqueue_workloop_params),
+		.kqwlp_id = workloop_id,
+		.kqwlp_flags = 0,
+	};
+
+	if (!attr) {
+		return EINVAL;
+	}
+
+	if (attr->schedset) {
+		params.kqwlp_flags |= KQ_WORKLOOP_CREATE_SCHED_PRI;
+		params.kqwlp_sched_pri = attr->param.sched_priority;
+	}
+
+	if (attr->policyset) {
+		params.kqwlp_flags |= KQ_WORKLOOP_CREATE_SCHED_POL;
+		params.kqwlp_sched_pol = attr->policy;
+	}
+
+	if (attr->cpupercentset) {
+		params.kqwlp_flags |= KQ_WORKLOOP_CREATE_CPU_PERCENT;
+		params.kqwlp_cpu_percent = attr->cpupercent;
+		params.kqwlp_cpu_refillms = attr->refillms;
+	}
+
+	int res = __kqueue_workloop_ctl(KQ_WORKLOOP_CREATE, 0, &params,
+			sizeof(params));
+	if (res == -1) {
+		res = errno;
+	}
+	return res;
+}
+
+int
+_pthread_workloop_destroy(uint64_t workloop_id)
+{
+	struct kqueue_workloop_params params = {
+		.kqwlp_version = sizeof(struct kqueue_workloop_params),
+		.kqwlp_id = workloop_id,
+	};
+
+	int res = __kqueue_workloop_ctl(KQ_WORKLOOP_DESTROY, 0, &params,
+			sizeof(params));
+	if (res == -1) {
+		res = errno;
+	}
+	return res;
+}
+
+
+#pragma mark Introspection SPI for libpthread.
+
 
 static pthread_introspection_hook_t _pthread_introspection_hook;
 
@@ -2428,19 +2501,17 @@ pthread_introspection_hook_install(pthread_introspection_hook_t hook)
 
 PTHREAD_NOINLINE
 static void
-_pthread_introspection_hook_callout_thread_create(pthread_t t, bool destroy)
+_pthread_introspection_hook_callout_thread_create(pthread_t t)
 {
 	_pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_CREATE, t, t,
 			PTHREAD_SIZE);
-	if (!destroy) return;
-	_pthread_introspection_thread_destroy(t);
 }
 
 static inline void
-_pthread_introspection_thread_create(pthread_t t, bool destroy)
+_pthread_introspection_thread_create(pthread_t t)
 {
 	if (os_fastpath(!_pthread_introspection_hook)) return;
-	_pthread_introspection_hook_callout_thread_create(t, destroy);
+	_pthread_introspection_hook_callout_thread_create(t);
 }
 
 PTHREAD_NOINLINE
@@ -2449,8 +2520,9 @@ _pthread_introspection_hook_callout_thread_start(pthread_t t)
 {
 	size_t freesize;
 	void *freeaddr;
-	if (t == &_thread) {
-		freesize = t->stacksize + t->guardsize;
+	if (t == main_thread()) {
+		size_t stacksize = t->stackaddr - t->stackbottom;
+		freesize = stacksize + t->guardsize;
 		freeaddr = t->stackaddr - freesize;
 	} else {
 		freesize = t->freesize - PTHREAD_SIZE;
@@ -2469,32 +2541,33 @@ _pthread_introspection_thread_start(pthread_t t)
 
 PTHREAD_NOINLINE
 static void
-_pthread_introspection_hook_callout_thread_terminate(pthread_t t,
-		void *freeaddr, size_t freesize, bool destroy)
+_pthread_introspection_hook_callout_thread_terminate(pthread_t t)
 {
-	if (destroy && freesize) {
-		freesize -= PTHREAD_SIZE;
+	size_t freesize;
+	void *freeaddr;
+	if (t == main_thread()) {
+		size_t stacksize = t->stackaddr - t->stackbottom;
+		freesize = stacksize + t->guardsize;
+		freeaddr = t->stackaddr - freesize;
+	} else {
+		freesize = t->freesize - PTHREAD_SIZE;
+		freeaddr = t->freeaddr;
 	}
 	_pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_TERMINATE, t,
 			freeaddr, freesize);
-	if (!destroy) return;
-	_pthread_introspection_thread_destroy(t);
 }
 
 static inline void
-_pthread_introspection_thread_terminate(pthread_t t, void *freeaddr,
-		size_t freesize, bool destroy)
+_pthread_introspection_thread_terminate(pthread_t t)
 {
 	if (os_fastpath(!_pthread_introspection_hook)) return;
-	_pthread_introspection_hook_callout_thread_terminate(t, freeaddr, freesize,
-			destroy);
+	_pthread_introspection_hook_callout_thread_terminate(t);
 }
 
 PTHREAD_NOINLINE
 static void
 _pthread_introspection_hook_callout_thread_destroy(pthread_t t)
 {
-	if (t == &_thread) return;
 	_pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_DESTROY, t, t,
 			PTHREAD_SIZE);
 }
@@ -2506,3 +2579,37 @@ _pthread_introspection_thread_destroy(pthread_t t)
 	_pthread_introspection_hook_callout_thread_destroy(t);
 }
 
+#pragma mark libplatform shims
+
+#include <platform/string.h>
+
+// pthread_setup initializes large structures to 0,
+// which the compiler turns into a library call to memset.
+//
+// To avoid linking against Libc, provide a simple wrapper
+// that calls through to the libplatform primitives
+
+#undef memset
+PTHREAD_NOEXPORT
+void *
+memset(void *b, int c, size_t len)
+{
+	return _platform_memset(b, c, len);
+}
+
+#undef bzero
+PTHREAD_NOEXPORT
+void
+bzero(void *s, size_t n)
+{
+	_platform_bzero(s, n);
+}
+
+#undef memcpy
+PTHREAD_NOEXPORT
+void *
+memcpy(void* a, const void* b, unsigned long s)
+{
+	return _platform_memmove(a, b, s);
+}
+
diff --git a/src/pthread_asm.s b/src/pthread_asm.s
index 8fc11c7..90afe46 100644
--- a/src/pthread_asm.s
+++ b/src/pthread_asm.s
@@ -21,6 +21,8 @@
  * @APPLE_LICENSE_HEADER_END@
  */
 
+#include "offsets.h"
+
 #if defined(__x86_64__)
 
 #include <mach/i386/syscall_sw.h>
@@ -49,6 +51,51 @@ _thread_start:
 	leave
 	ret
 
+	.align 2, 0x90
+	.globl _thread_chkstk_darwin
+_thread_chkstk_darwin:
+	.globl ____chkstk_darwin
+____chkstk_darwin: // %rax == alloca size
+	pushq  %rcx
+	leaq   0x10(%rsp), %rcx
+
+	// validate that the frame pointer is on our stack (no alt stack)
+	cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET
+	jb     Lprobe
+	cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET
+	jae    Lprobe
+
+	// validate alloca size
+	subq   %rax, %rcx
+	jb     Lcrash
+	cmpq   %rcx, %gs:_PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET
+	ja     Lcrash
+
+	popq   %rcx
+	retq
+
+Lprobe:
+	// probe the stack when it's not ours (altstack or some shenanigan)
+	cmpq   $0x1000, %rax
+	jb     Lend
+	pushq  %rax
+Lloop:
+	subq   $0x1000, %rcx
+	testq  %rcx, (%rcx)
+	subq   $0x1000, %rax
+	cmpq   $0x1000, %rax
+	ja     Lloop
+	popq   %rax
+Lend:
+	subq   %rax, %rcx
+	testq  %rcx, (%rcx)
+
+	popq   %rcx
+	retq
+
+Lcrash:
+	ud2
+
 #endif
 
 #elif defined(__i386__)
@@ -91,6 +138,56 @@ _thread_start:
 	leave
 	ret
 
+	.align 2, 0x90
+	.globl _thread_chkstk_darwin
+_thread_chkstk_darwin:
+	.globl ____chkstk_darwin
+____chkstk_darwin: // %eax == alloca size
+	pushl  %ecx
+	pushl  %edx
+	leal   0xc(%esp), %ecx
+
+	// validate that the frame pointer is on our stack (no alt stack)
+	movl   %gs:0x0, %edx    // pthread_self()
+	cmpl   %ecx, _PTHREAD_STRUCT_DIRECT_STACKADDR_OFFSET(%edx)
+	jb     Lprobe
+	movl   _PTHREAD_STRUCT_DIRECT_STACKBOTTOM_OFFSET(%edx), %edx
+	cmpl   %ecx, %edx
+	jae    Lprobe
+
+	// validate alloca size
+	subl   %eax, %ecx
+	jb     Lcrash
+	cmpl   %ecx, %edx
+	ja     Lcrash
+
+	popl   %edx
+	popl   %ecx
+	retl
+
+Lprobe:
+	// probe the stack when it's not ours (altstack or some shenanigan)
+	cmpl   $0x1000, %eax
+	jb     Lend
+	pushl  %eax
+Lloop:
+	subl   $0x1000, %ecx
+	testl  %ecx, (%ecx)
+	subl   $0x1000, %eax
+	cmpl   $0x1000, %eax
+	ja     Lloop
+	popl   %eax
+Lend:
+	subl   %eax, %ecx
+	testl  %ecx, (%ecx)
+
+	popl   %edx
+	popl   %ecx
+	retl
+
+Lcrash:
+	ud2
+
 #endif
 
 #elif defined(__arm__)
diff --git a/src/pthread_cancelable.c b/src/pthread_cancelable.c
index 894178c..8bb9c08 100644
--- a/src/pthread_cancelable.c
+++ b/src/pthread_cancelable.c
@@ -60,10 +60,10 @@
 #include <sys/resource.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
+#include <sys/ulock.h>
 #include <machine/vmparam.h>
 #include <mach/vm_statistics.h>
 
-extern int __unix_conforming;
 extern int _pthread_cond_wait(pthread_cond_t *cond,
 			pthread_mutex_t *mutex,
 			const struct timespec *abstime,
@@ -73,16 +73,27 @@ extern int __sigwait(const sigset_t *set, int *sig);
 extern int __pthread_sigmask(int, const sigset_t *, sigset_t *);
 extern int __pthread_markcancel(mach_port_t);
 extern int __pthread_canceled(int);
+extern int __semwait_signal_nocancel(int, int, int, int, __int64_t, __int32_t);
 
-#ifdef VARIANT_CANCELABLE
-extern int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, __int64_t tv_sec, __int32_t tv_nsec);
-#else
-extern int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, __int64_t tv_sec, __int32_t tv_nsec)  __asm__("___semwait_signal_nocancel");
-#endif
 
 PTHREAD_NOEXPORT
-int _pthread_join(pthread_t thread, void **value_ptr, int conforming,
-		int (*_semwait_signal)(int, int, int, int, __int64_t, __int32_t));
+int _pthread_join(pthread_t thread, void **value_ptr, int conforming);
+
+static inline int
+_pthread_conformance(void)
+{
+#if __DARWIN_UNIX03
+	if (__unix_conforming == 0)
+		__unix_conforming = 1;
+#ifdef VARIANT_CANCELABLE
+	return PTHREAD_CONFORM_UNIX03_CANCELABLE;
+#else /* !VARIANT_CANCELABLE */
+	return PTHREAD_CONFORM_UNIX03_NOCANCEL;
+#endif
+#else /* __DARWIN_UNIX03 */
+	return PTHREAD_CONFORM_DARWIN_LEGACY;
+#endif /* __DARWIN_UNIX03 */
+}
 
 #ifndef VARIANT_CANCELABLE
 
@@ -111,7 +122,7 @@ pthread_cancel(pthread_t thread)
 		__unix_conforming = 1;
 #endif /* __DARWIN_UNIX03 */
 
-	if (!_pthread_is_valid(thread, 0, NULL)) {
+	if (!_pthread_is_valid(thread, NULL)) {
 		return(ESRCH);
 	}
 
@@ -135,15 +146,7 @@ pthread_cancel(pthread_t thread)
 void
 pthread_testcancel(void)
 {
-	pthread_t self = pthread_self();
-
-#if __DARWIN_UNIX03
-	if (__unix_conforming == 0)
-		__unix_conforming = 1;
-	_pthread_testcancel(self, 1);
-#else /* __DARWIN_UNIX03 */
-	_pthread_testcancel(self, 0);
-#endif /* __DARWIN_UNIX03 */
+	_pthread_testcancel(_pthread_conformance());
 }
 
 #ifndef BUILDING_VARIANT /* [ */
@@ -154,23 +157,32 @@ _pthread_exit_if_canceled(int error)
 {
 	if (((error & 0xff) == EINTR) && __unix_conforming && (__pthread_canceled(0) == 0)) {
 		pthread_t self = pthread_self();
-		if (self != NULL) {
-			self->cancel_error = error;
-		}
+
+		self->cancel_error = error;
+		self->canceled = true;
 		pthread_exit(PTHREAD_CANCELED);
 	}
 }
 
 
-PTHREAD_NOEXPORT_VARIANT
-void
-_pthread_testcancel(pthread_t thread, int isconforming)
+static inline bool
+_pthread_is_canceled(pthread_t thread)
 {
 	const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-
 	int state = os_atomic_load2o(thread, cancel_state, seq_cst);
-	if ((state & flags) == flags) {
-		pthread_exit(isconforming ? PTHREAD_CANCELED : 0);
+	return (state & flags) == flags;
+}
+
+PTHREAD_NOEXPORT_VARIANT
+void
+_pthread_testcancel(int isconforming)
+{
+	pthread_t self = pthread_self();
+	if (_pthread_is_canceled(self)) {
+		// 4597450: begin
+		self->canceled = (isconforming != PTHREAD_CONFORM_DARWIN_LEGACY);
+		// 4597450: end
+		pthread_exit(isconforming ? PTHREAD_CANCELED : NULL);
 	}
 }
 
@@ -179,7 +191,6 @@ void
 _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport)
 {
 	const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-
 	int state = os_atomic_or2o(thread, cancel_state,
 			_PTHREAD_CANCEL_INITIALIZED, relaxed);
 	if ((state & flags) == flags && __unix_conforming) {
@@ -187,35 +198,14 @@ _pthread_markcancel_if_canceled(pthread_t thread, mach_port_t kport)
 	}
 }
 
-PTHREAD_NOEXPORT
-void *
-_pthread_get_exit_value(pthread_t thread, int conforming)
-{
-	const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
-	void *value = thread->exit_value;
-
-	if (conforming) {
-		int state = os_atomic_load2o(thread, cancel_state, seq_cst);
-		if ((state & flags) == flags) {
-			value = PTHREAD_CANCELED;
-		}
-	}
-	return value;
-}
-
 /* When a thread exits set the cancellation state to DISABLE and DEFERRED */
 PTHREAD_NOEXPORT
 void
-_pthread_setcancelstate_exit(pthread_t thread, void *value_ptr, int conforming)
+_pthread_setcancelstate_exit(pthread_t thread, void *value_ptr)
 {
 	_pthread_update_cancel_state(thread,
 			_PTHREAD_CANCEL_STATE_MASK | _PTHREAD_CANCEL_TYPE_MASK,
 			PTHREAD_CANCEL_DISABLE | PTHREAD_CANCEL_DEFERRED);
-	if (value_ptr == PTHREAD_CANCELED) {
-		_PTHREAD_LOCK(thread->lock);
-		thread->detached |= _PTHREAD_WASCANCEL; // 4597450
-		_PTHREAD_UNLOCK(thread->lock);
-	}
 }
 
 #endif /* !BUILDING_VARIANT ] */
@@ -227,30 +217,30 @@ PTHREAD_ALWAYS_INLINE
 static inline int
 _pthread_setcancelstate_internal(int state, int *oldstateptr, int conforming)
 {
-	pthread_t self;
+	pthread_t self = pthread_self();
 
 	switch (state) {
-		case PTHREAD_CANCEL_ENABLE:
-			if (conforming) {
-				__pthread_canceled(1);
-			}
-			break;
-		case PTHREAD_CANCEL_DISABLE:
-			if (conforming) {
-				__pthread_canceled(2);
-			}
-			break;
-		default:
-			return EINVAL;
+	case PTHREAD_CANCEL_ENABLE:
+		if (conforming) {
+			__pthread_canceled(1);
+		}
+		break;
+	case PTHREAD_CANCEL_DISABLE:
+		if (conforming) {
+			__pthread_canceled(2);
+		}
+		break;
+	default:
+		return EINVAL;
 	}
 
-	self = pthread_self();
 	int oldstate = _pthread_update_cancel_state(self, _PTHREAD_CANCEL_STATE_MASK, state);
 	if (oldstateptr) {
 		*oldstateptr = oldstate & _PTHREAD_CANCEL_STATE_MASK;
 	}
 	if (!conforming) {
-		_pthread_testcancel(self, 0);  /* See if we need to 'die' now... */
+		/* See if we need to 'die' now... */
+		_pthread_testcancel(PTHREAD_CONFORM_DARWIN_LEGACY);
 	}
 	return 0;
 }
@@ -292,7 +282,8 @@ pthread_setcanceltype(int type, int *oldtype)
 		*oldtype = oldstate & _PTHREAD_CANCEL_TYPE_MASK;
 	}
 #if !__DARWIN_UNIX03
-	_pthread_testcancel(self, 0);  /* See if we need to 'die' now... */
+	/* See if we need to 'die' now... */
+	_pthread_testcancel(PTHREAD_CONFORM_DARWIN_LEGACY);
 #endif /* __DARWIN_UNIX03 */
 	return (0);
 }
@@ -315,76 +306,196 @@ pthread_sigmask(int how, const sigset_t * set, sigset_t * oset)
 
 #ifndef BUILDING_VARIANT /* [ */
 
-static void
-__posix_join_cleanup(void *arg)
+typedef struct pthread_join_context_s {
+	pthread_t   waiter;
+	void      **value_ptr;
+	mach_port_t kport;
+	semaphore_t custom_stack_sema;
+	bool        detached;
+} pthread_join_context_s, *pthread_join_context_t;
+
+static inline void *
+_pthread_get_exit_value(pthread_t thread)
 {
-	pthread_t thread = (pthread_t)arg;
+	if (__unix_conforming && _pthread_is_canceled(thread)) {
+		return PTHREAD_CANCELED;
+	}
+	return thread->tl_exit_value;
+}
 
-	_PTHREAD_LOCK(thread->lock);
-	/* leave another thread to join */
-	thread->joiner = (struct _pthread *)NULL;
-	_PTHREAD_UNLOCK(thread->lock);
+// called with _pthread_list_lock held
+PTHREAD_NOEXPORT
+semaphore_t
+_pthread_joiner_prepost_wake(pthread_t thread)
+{
+	pthread_join_context_t ctx = thread->tl_join_ctx;
+	semaphore_t sema = MACH_PORT_NULL;
+
+	if (thread->tl_joinable) {
+		sema = ctx->custom_stack_sema;
+		thread->tl_joinable = false;
+	} else {
+		ctx->detached = true;
+		thread->tl_join_ctx = NULL;
+	}
+	if (ctx->value_ptr) *ctx->value_ptr = _pthread_get_exit_value(thread);
+	return sema;
+}
+
+static inline bool
+_pthread_joiner_abort_wait(pthread_t thread, pthread_join_context_t ctx)
+{
+	bool aborted = false;
+
+	_PTHREAD_LOCK(_pthread_list_lock);
+	if (!ctx->detached && thread->tl_exit_gate != MACH_PORT_DEAD) {
+		/*
+		 * _pthread_joiner_prepost_wake() didn't happen
+		 * allow another thread to join
+		 */
+#if DEBUG
+		PTHREAD_ASSERT(thread->tl_join_ctx == ctx);
+#endif
+		thread->tl_join_ctx = NULL;
+		thread->tl_exit_gate = MACH_PORT_NULL;
+		aborted = true;
+	}
+	_PTHREAD_UNLOCK(_pthread_list_lock);
+	return aborted;
+}
+
+static int
+_pthread_joiner_wait(pthread_t thread, pthread_join_context_t ctx, int conforming)
+{
+	uint32_t *exit_gate = &thread->tl_exit_gate;
+	int ulock_op = UL_UNFAIR_LOCK | ULF_NO_ERRNO;
+
+	if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+		ulock_op |= ULF_WAIT_CANCEL_POINT;
+	}
+
+	for (;;) {
+		uint32_t cur = os_atomic_load(exit_gate, acquire);
+		if (cur == MACH_PORT_DEAD) {
+			break;
+		}
+		if (os_unlikely(cur != ctx->kport)) {
+			PTHREAD_CLIENT_CRASH(cur, "pthread_join() state corruption");
+		}
+		int ret = __ulock_wait(ulock_op, exit_gate, ctx->kport, 0);
+		switch (-ret) {
+		case 0:
+		case EFAULT:
+			break;
+		case EINTR:
+			/*
+			 * POSIX says:
+			 *
+			 *   As specified, either the pthread_join() call is canceled, or it
+			 *   succeeds, but not both. The difference is obvious to the
+			 *   application, since either a cancellation handler is run or
+			 *   pthread_join() returns.
+			 *
+			 * When __ulock_wait() returns EINTR, we check if we have been
+			 * canceled, and if we have, we try to abort the wait.
+			 *
+			 * If we can't, it means the other thread finished the join while we
+			 * were being canceled and commited the waiter to return from
+			 * pthread_join(). Returning from the join then takes precedence
+			 * over the cancelation which will be acted upon at the next
+			 * cancelation point.
+			 */
+			if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE &&
+					_pthread_is_canceled(ctx->waiter)) {
+				if (_pthread_joiner_abort_wait(thread, ctx)) {
+					ctx->waiter->canceled = true;
+					pthread_exit(PTHREAD_CANCELED);
+				}
+			}
+			break;
+		}
+	}
+
+	bool cleanup = false;
+
+	_PTHREAD_LOCK(_pthread_list_lock);
+	// If pthread_detach() was called, we can't safely dereference the thread,
+	// else, decide who gets to deallocate the thread (see _pthread_terminate).
+	if (!ctx->detached) {
+#if DEBUG
+		PTHREAD_ASSERT(thread->tl_join_ctx == ctx);
+#endif
+		thread->tl_join_ctx = NULL;
+		cleanup = thread->tl_joiner_cleans_up;
+	}
+	_PTHREAD_UNLOCK(_pthread_list_lock);
+
+	if (cleanup) {
+		_pthread_deallocate(thread, false);
+	}
+	return 0;
 }
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
-_pthread_join(pthread_t thread, void **value_ptr, int conforming,
-		int (*_semwait_signal)(int, int, int, int, __int64_t, __int32_t))
+_pthread_join(pthread_t thread, void **value_ptr, int conforming)
 {
-	int res = 0;
 	pthread_t self = pthread_self();
-	kern_return_t kern_res;
-	semaphore_t joinsem, death = (semaphore_t)os_get_cached_semaphore();
+	pthread_join_context_s ctx = {
+		.waiter = self,
+		.value_ptr = value_ptr,
+		.kport = MACH_PORT_NULL,
+		.custom_stack_sema = MACH_PORT_NULL,
+	};
+	int res = 0;
+	kern_return_t kr;
 
-	if (!_pthread_is_valid(thread, PTHREAD_IS_VALID_LOCK_THREAD, NULL)) {
-		res = ESRCH;
-		goto out;
+	if (!_pthread_validate_thread_and_list_lock(thread)) {
+		return ESRCH;
 	}
 
-	if (thread->sig != _PTHREAD_SIG) {
-		res = ESRCH;
-	} else if ((thread->detached & PTHREAD_CREATE_DETACHED) ||
-			!(thread->detached & PTHREAD_CREATE_JOINABLE) ||
-			(thread->joiner != NULL)) {
+	if (!thread->tl_joinable || (thread->tl_join_ctx != NULL)) {
 		res = EINVAL;
-	} else if (thread == self || (self != NULL && self->joiner == thread)) {
+	} else if (thread == self ||
+			(self->tl_join_ctx && self->tl_join_ctx->waiter == thread)) {
 		res = EDEADLK;
+	} else if (thread->tl_exit_gate == MACH_PORT_DEAD) {
+		TAILQ_REMOVE(&__pthread_head, thread, tl_plist);
+#if DEBUG
+		PTHREAD_ASSERT(thread->tl_joiner_cleans_up);
+#endif
+		thread->tl_joinable = false;
+		if (value_ptr) *value_ptr = _pthread_get_exit_value(thread);
+	} else {
+		ctx.kport = _pthread_kernel_thread(thread);
+		thread->tl_exit_gate = ctx.kport;
+		thread->tl_join_ctx = &ctx;
+		if (thread->tl_has_custom_stack) {
+			ctx.custom_stack_sema = (semaphore_t)os_get_cached_semaphore();
+		}
 	}
-	if (res != 0) {
-		_PTHREAD_UNLOCK(thread->lock);
-		goto out;
-	}
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 
-	joinsem = thread->joiner_notify;
-	if (joinsem == SEMAPHORE_NULL) {
-		thread->joiner_notify = joinsem = death;
-		death = MACH_PORT_NULL;
+	if (res == 0) {
+		if (ctx.kport == MACH_PORT_NULL) {
+			_pthread_deallocate(thread, false);
+		} else {
+			res = _pthread_joiner_wait(thread, &ctx, conforming);
+		}
 	}
-	thread->joiner = self;
-	_PTHREAD_UNLOCK(thread->lock);
-
-	if (conforming) {
-		/* Wait for it to signal... */
-		pthread_cleanup_push(__posix_join_cleanup, (void *)thread);
-		do {
-			res = _semwait_signal(joinsem, 0, 0, 0, 0, 0);
-		} while ((res < 0) && (errno == EINTR));
-		pthread_cleanup_pop(0);
-	} else {
-		/* Wait for it to signal... */
-		kern_return_t (*_semaphore_wait)(semaphore_t) =
-				(void*)_semwait_signal;
+	if (res == 0 && ctx.custom_stack_sema && !ctx.detached) {
+		// threads with a custom stack need to make sure _pthread_terminate
+		// returned before the joiner is unblocked, the joiner may quickly
+		// deallocate the stack with rather dire consequences.
+		//
+		// When we reach this point we know the pthread_join has to succeed
+		// so this can't be a cancelation point.
 		do {
-			kern_res = _semaphore_wait(joinsem);
-		} while (kern_res != KERN_SUCCESS);
+			kr = __semwait_signal_nocancel(ctx.custom_stack_sema, 0, 0, 0, 0, 0);
+		} while (kr != KERN_SUCCESS);
 	}
-
-	os_put_cached_semaphore((os_semaphore_t)joinsem);
-	res = _pthread_join_cleanup(thread, value_ptr, conforming);
-
-out:
-	if (death) {
-		os_put_cached_semaphore(death);
+	if (ctx.custom_stack_sema) {
+		os_put_cached_semaphore(ctx.custom_stack_sema);
 	}
 	return res;
 }
@@ -398,82 +509,45 @@ out:
 int
 pthread_join(pthread_t thread, void **value_ptr)
 {
-#if __DARWIN_UNIX03
-	if (__unix_conforming == 0)
-		__unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-	_pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
-	return _pthread_join(thread, value_ptr, 1, __semwait_signal);
-#else
-	return _pthread_join(thread, value_ptr, 0, (void*)semaphore_wait);
-#endif /* __DARWIN_UNIX03 */
-
+	int conforming = _pthread_conformance();
+	if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+		_pthread_testcancel(conforming);
+	}
+	return _pthread_join(thread, value_ptr, conforming);
 }
 
 int
-pthread_cond_wait(pthread_cond_t *cond,
-		  pthread_mutex_t *mutex)
+pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
 {
-	int conforming;
-#if __DARWIN_UNIX03
-
-	if (__unix_conforming == 0)
-		__unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-	conforming = 1;
-#else /* !VARIANT_CANCELABLE */
-	conforming = -1;
-#endif /* VARIANT_CANCELABLE */
-#else /* __DARWIN_UNIX03 */
-	conforming = 0;
-#endif /* __DARWIN_UNIX03 */
-	return (_pthread_cond_wait(cond, mutex, (struct timespec *)NULL, 0, conforming));
+	return _pthread_cond_wait(cond, mutex, NULL, 0, _pthread_conformance());
 }
 
 int
-pthread_cond_timedwait(pthread_cond_t *cond,
-		       pthread_mutex_t *mutex,
-		       const struct timespec *abstime)
+pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
+		const struct timespec *abstime)
 {
-	int conforming;
-#if __DARWIN_UNIX03
-	if (__unix_conforming == 0)
-		__unix_conforming = 1;
-
-#ifdef VARIANT_CANCELABLE
-	conforming = 1;
-#else /* !VARIANT_CANCELABLE */
-	conforming = -1;
-#endif /* VARIANT_CANCELABLE */
-#else /* __DARWIN_UNIX03 */
-        conforming = 0;
-#endif /* __DARWIN_UNIX03 */
-
-	return (_pthread_cond_wait(cond, mutex, abstime, 0, conforming));
+	return _pthread_cond_wait(cond, mutex, abstime, 0, _pthread_conformance());
 }
 
 int
 sigwait(const sigset_t * set, int * sig)
 {
 #if __DARWIN_UNIX03
-	int err = 0;
+	int err = 0, conformance = _pthread_conformance();
 
 	if (__unix_conforming == 0)
 		__unix_conforming = 1;
 
-#ifdef VARIANT_CANCELABLE
-	_pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
+	if (conformance == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+		_pthread_testcancel(conformance);
+	}
 
 	if (__sigwait(set, sig) == -1) {
 		err = errno;
 
-#ifdef VARIANT_CANCELABLE
-		_pthread_testcancel(pthread_self(), 1);
-#endif /* VARIANT_CANCELABLE */
+		if (conformance == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+			_pthread_testcancel(conformance);
+		}
 
 		/*
 		 * EINTR that isn't a result of pthread_cancel()
diff --git a/src/pthread_cond.c b/src/pthread_cond.c
index be55e1d..79e38ba 100644
--- a/src/pthread_cond.c
+++ b/src/pthread_cond.c
@@ -59,7 +59,6 @@
 #endif /* PLOCKSTAT */
 
 extern int __gettimeofday(struct timeval *, struct timezone *);
-extern void _pthread_testcancel(pthread_t thread, int isconforming);
 
 PTHREAD_NOEXPORT
 int _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex,
@@ -88,8 +87,8 @@ COND_GETSEQ_ADDR(_pthread_cond *cond,
 #ifndef BUILDING_VARIANT /* [ */
 
 static void _pthread_cond_cleanup(void *arg);
-static void _pthread_cond_updateval(_pthread_cond * cond, int error,
-		uint32_t updateval);
+static void _pthread_cond_updateval(_pthread_cond *cond, _pthread_mutex *mutex,
+		int error, uint32_t updateval);
 
 
 int
@@ -401,7 +400,7 @@ _pthread_cond_signal(pthread_cond_t *ocond, bool broadcast, mach_port_t thread)
 	}
 
 	if (updateval != (uint32_t)-1 && updateval != 0) {
-		_pthread_cond_updateval(cond, 0, updateval);
+		_pthread_cond_updateval(cond, NULL, 0, updateval);
 	}
 
 	return 0;
@@ -449,8 +448,8 @@ pthread_cond_signal(pthread_cond_t *ocond)
  * Suspend waiting for a condition variable.
  * Note: we have to keep a list of condition variables which are using
  * this same mutex variable so we can detect invalid 'destroy' sequences.
- * If isconforming < 0, we skip the _pthread_testcancel(), but keep the
- * remaining conforming behavior..
+ * If conformance is not cancelable, we skip the _pthread_testcancel(),
+ * but keep the remaining conforming behavior..
  */
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
@@ -458,7 +457,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
 			pthread_mutex_t *omutex,
 			const struct timespec *abstime,
 			int isRelative,
-			int isconforming)
+			int conforming)
 {
 	int res;
 	_pthread_cond *cond = (_pthread_cond *)ocond;
@@ -477,13 +476,13 @@ _pthread_cond_wait(pthread_cond_t *ocond,
 		return res;
 	}
 
-	if (isconforming) {
+	if (conforming) {
 		if (!_pthread_mutex_check_signature(mutex) &&
 				!_pthread_mutex_check_signature_init(mutex)) {
 			return EINVAL;
 		}
-		if (isconforming > 0) {
-			_pthread_testcancel(pthread_self(), 1);
+		if (conforming == PTHREAD_CONFORM_UNIX03_CANCELABLE) {
+			_pthread_testcancel(conforming);
 		}
 	}
 
@@ -505,7 +504,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
 			if (then.tv_sec < 0 || (then.tv_sec == 0 && then.tv_nsec == 0)) {
 				return ETIMEDOUT;
 			}
-			if (isconforming &&
+			if (conforming &&
 			    (abstime->tv_sec < 0 ||
 			     abstime->tv_nsec < 0 ||
 			     abstime->tv_nsec >= NSEC_PER_SEC)) {
@@ -518,7 +517,7 @@ _pthread_cond_wait(pthread_cond_t *ocond,
 				return ETIMEDOUT;
 			}
 		}
-		if (isconforming && (then.tv_sec < 0 || then.tv_nsec < 0)) {
+		if (conforming && (then.tv_sec < 0 || then.tv_nsec < 0)) {
 			return EINVAL;
 		}
 		if (then.tv_nsec >= NSEC_PER_SEC) {
@@ -567,10 +566,10 @@ _pthread_cond_wait(pthread_cond_t *ocond,
 	cvlsgen = ((uint64_t)(ulval | savebits)<< 32) | nlval;
 
 	// SUSv3 requires pthread_cond_wait to be a cancellation point
-	if (isconforming) {
+	if (conforming) {
 		pthread_cleanup_push(_pthread_cond_cleanup, (void *)cond);
 		updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec);
-		_pthread_testcancel(pthread_self(), isconforming);
+		_pthread_testcancel(conforming);
 		pthread_cleanup_pop(0);
 	} else {
 		updateval = __psynch_cvwait(ocond, cvlsgen, ucntval, (pthread_mutex_t *)npmtx, mugen, flags, (int64_t)then.tv_sec, (int32_t)then.tv_nsec);
@@ -592,12 +591,12 @@ _pthread_cond_wait(pthread_cond_t *ocond,
 		}
 
 		// add unlock ref to show one less waiter
-		_pthread_cond_updateval(cond, err, 0);
+		_pthread_cond_updateval(cond, mutex, err, 0);
 	} else if (updateval != 0) {
 		// Successful wait
 		// The return due to prepost and might have bit states
 		// update S and return for prepo if needed
-		_pthread_cond_updateval(cond, 0, updateval);
+		_pthread_cond_updateval(cond, mutex, 0, updateval);
 	}
 
 	pthread_mutex_lock(omutex);
@@ -609,25 +608,20 @@ static void
 _pthread_cond_cleanup(void *arg)
 {
 	_pthread_cond *cond = (_pthread_cond *)arg;
+	pthread_t thread = pthread_self();
 	pthread_mutex_t *mutex;
 
 // 4597450: begin
-	pthread_t thread = pthread_self();
-	int thcanceled = 0;
-
-	_PTHREAD_LOCK(thread->lock);
-	thcanceled = (thread->detached & _PTHREAD_WASCANCEL);
-	_PTHREAD_UNLOCK(thread->lock);
-
-	if (thcanceled == 0) {
+	if (!thread->canceled) {
 		return;
 	}
-
 // 4597450: end
+
 	mutex = (pthread_mutex_t *)cond->busy;
 
 	// add unlock ref to show one less waiter
-	_pthread_cond_updateval(cond, thread->cancel_error, 0);
+	_pthread_cond_updateval(cond, (_pthread_mutex *)mutex,
+			thread->cancel_error, 0);
 
 	/*
 	** Can't do anything if this fails -- we're on the way out
@@ -637,11 +631,9 @@ _pthread_cond_cleanup(void *arg)
 	}
 }
 
-#define ECVCERORR       256
-#define ECVPERORR       512
-
 static void
-_pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
+_pthread_cond_updateval(_pthread_cond *cond, _pthread_mutex *mutex,
+		int error, uint32_t updateval)
 {
 	int needclearpre;
 
@@ -653,10 +645,10 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
 
 	if (error != 0) {
 		updateval = PTHRW_INC;
-		if ((error & ECVCERORR) != 0) {
+		if (error & ECVCLEARED) {
 			updateval |= PTH_RWS_CV_CBIT;
 		}
-		if ((error & ECVPERORR) != 0) {
+		if (error & ECVPREPOST) {
 			updateval |= PTH_RWS_CV_PBIT;
 		}
 	}
@@ -675,7 +667,10 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
 		oldval64 = (((uint64_t)scntval) << 32);
 		oldval64 |= lcntval;
 
-		if (diffgen <= 0) {
+		PTHREAD_TRACE(psynch_cvar_updateval | DBG_FUNC_START, cond, oldval64,
+				updateval, 0);
+
+		if (diffgen <= 0 && !is_rws_pbit_set(updateval)) {
 			/* TBD: Assert, should not be the case */
 			/* validate it is spurious and return */
 			newval64 = oldval64;
@@ -700,19 +695,22 @@ _pthread_cond_updateval(_pthread_cond *cond, int error, uint32_t updateval)
 		}
 	} while (!os_atomic_cmpxchg(c_lsseqaddr, oldval64, newval64, seq_cst));
 
+	PTHREAD_TRACE(psynch_cvar_updateval | DBG_FUNC_END, cond, newval64,
+			(uint64_t)diffgen << 32 | needclearpre, 0);
+
 	if (diffgen > 0) {
 		// if L == S, then reset associated mutex
 		if ((nsval & PTHRW_COUNT_MASK) == (lcntval & PTHRW_COUNT_MASK)) {
 			cond->busy = NULL;
 		}
+	}
 
-		if (needclearpre != 0) {
-			uint32_t flags = 0;
-			if (cond->pshared == PTHREAD_PROCESS_SHARED) {
-				flags |= _PTHREAD_MTX_OPT_PSHARED;
-			}
-			(void)__psynch_cvclrprepost(cond, lcntval, ucntval, nsval, 0, lcntval, flags);
+	if (needclearpre) {
+		uint32_t flags = 0;
+		if (cond->pshared == PTHREAD_PROCESS_SHARED) {
+			flags |= _PTHREAD_MTX_OPT_PSHARED;
 		}
+		(void)__psynch_cvclrprepost(cond, lcntval, ucntval, nsval, 0, lcntval, flags);
 	}
 }
 
diff --git a/src/pthread_dependency.c b/src/pthread_dependency.c
new file mode 100644
index 0000000..282dfc3
--- /dev/null
+++ b/src/pthread_dependency.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include "resolver.h"
+#include "internal.h"
+#include "dependency_private.h"
+#include <sys/ulock.h>
+
+#define PREREQUISITE_FULFILLED  (~0u)
+
+PTHREAD_NOEXPORT
+void _pthread_dependency_fulfill_slow(pthread_dependency_t *pr, uint32_t old);
+
+OS_ALWAYS_INLINE
+static inline mach_port_t
+_pthread_dependency_self(void)
+{
+	void *v = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF);
+	return (mach_port_t)(uintptr_t)v;
+}
+
+void
+pthread_dependency_init_np(pthread_dependency_t *pr, pthread_t pth,
+		pthread_dependency_attr_t *attrs)
+{
+	if (attrs) *(volatile char *)attrs;
+	*pr = (pthread_dependency_t)PTHREAD_DEPENDENCY_INITIALIZER_NP(pth);
+}
+
+OS_NOINLINE
+void
+_pthread_dependency_fulfill_slow(pthread_dependency_t *pr, uint32_t old)
+{
+	if (old == PREREQUISITE_FULFILLED) {
+		PTHREAD_CLIENT_CRASH(0, "Fufilling pthread_dependency_t twice");
+	}
+	if (os_unlikely(old != _pthread_dependency_self())) {
+		PTHREAD_CLIENT_CRASH(old, "Fulfilled a dependency "
+				"not owned by current thread");
+	}
+
+	int ret = __ulock_wake(UL_UNFAIR_LOCK | ULF_NO_ERRNO, &pr->__pdep_opaque1, 0);
+	switch (-ret) {
+	case 0:
+	case ENOENT:
+		return;
+	default:
+		PTHREAD_INTERNAL_CRASH(-ret, "__ulock_wake() failed");
+	}
+}
+
+
+void
+pthread_dependency_fulfill_np(pthread_dependency_t *pr, void *value)
+{
+	uint32_t old;
+
+	pr->__pdep_opaque2 = (uint64_t)(uintptr_t)value;
+	old = os_atomic_xchg(&pr->__pdep_opaque1, PREREQUISITE_FULFILLED, release);
+
+	if (old != 0) _pthread_dependency_fulfill_slow(pr, old);
+}
+
+void *
+pthread_dependency_wait_np(pthread_dependency_t *pr)
+{
+	if (os_atomic_cmpxchg(&pr->__pdep_opaque1, 0, pr->__pdep_owner, relaxed)) {
+		int ret;
+	again:
+		ret = __ulock_wait(UL_UNFAIR_LOCK | ULF_NO_ERRNO, &pr->__pdep_opaque1,
+				pr->__pdep_owner, 0);
+		switch (-ret) {
+		case EFAULT:
+			if (pr->__pdep_opaque1 == pr->__pdep_owner) goto again;
+		case 0:
+			break;
+		case EOWNERDEAD:
+			PTHREAD_CLIENT_CRASH(pr->__pdep_owner, "Waiting on orphaned dependency");
+		default:
+			PTHREAD_CLIENT_CRASH(-ret, "__ulock_wait() failed");
+		}
+	}
+
+	uint32_t cur = os_atomic_load(&pr->__pdep_opaque1, acquire);
+	if (cur == PREREQUISITE_FULFILLED) {
+		return (void *)(uintptr_t)pr->__pdep_opaque2;
+	}
+	PTHREAD_CLIENT_CRASH(cur, "Corrupted pthread_dependency_t");
+}
+
diff --git a/src/pthread_mutex.c b/src/pthread_mutex.c
index a68503c..edc97ee 100644
--- a/src/pthread_mutex.c
+++ b/src/pthread_mutex.c
@@ -54,8 +54,6 @@
 #include "internal.h"
 #include "kern/kern_trace.h"
 
-extern int __unix_conforming;
-
 #ifndef BUILDING_VARIANT /* [ */
 
 #ifdef PLOCKSTAT
@@ -85,31 +83,73 @@ _plockstat_never_fired(void)
 
 #define PTHREAD_MUTEX_INIT_UNUSED 1
 
+PTHREAD_NOEXPORT PTHREAD_WEAK
+int _pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_fairshare_lock_slow(_pthread_mutex *mutex, bool trylock);
+
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
-int _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock);
+int _pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock);
 
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
-int _pthread_mutex_unlock_slow(pthread_mutex_t *omutex);
+int _pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int _pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex);
 
 PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
 int _pthread_mutex_corruption_abort(_pthread_mutex *mutex);
 
-extern int __pthread_mutex_default_policy PTHREAD_NOEXPORT;
+extern int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT;
+
+
+int __pthread_mutex_default_opt_policy PTHREAD_NOEXPORT =
+		_PTHREAD_MTX_OPT_POLICY_DEFAULT;
 
+static inline bool
+_pthread_mutex_policy_validate(int policy)
+{
+	return (policy >= 0 && policy < _PTHREAD_MUTEX_POLICY_LAST);
+}
 
-int __pthread_mutex_default_policy PTHREAD_NOEXPORT =
-		_PTHREAD_MUTEX_POLICY_FAIRSHARE;
+static inline int
+_pthread_mutex_policy_to_opt(int policy)
+{
+	switch (policy) {
+	case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP:
+		return _PTHREAD_MTX_OPT_POLICY_FAIRSHARE;
+	case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP:
+		return _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+	default:
+		__builtin_unreachable();
+	}
+}
 
 PTHREAD_NOEXPORT
 void
 _pthread_mutex_global_init(const char *envp[],
 		struct _pthread_registration_data *registration_data)
 {
+
+	int opt = _PTHREAD_MTX_OPT_POLICY_DEFAULT;
+	if (registration_data->mutex_default_policy) {
+		int policy = registration_data->mutex_default_policy;
+		if (_pthread_mutex_policy_validate(policy)) {
+			opt = _pthread_mutex_policy_to_opt(policy);
+		}
+	}
+
 	const char *envvar = _simple_getenv(envp, "PTHREAD_MUTEX_DEFAULT_POLICY");
-	if ((envvar && (envvar[0] - '0') == _PTHREAD_MUTEX_POLICY_FIRSTFIT) ||
-			(registration_data->mutex_default_policy ==
-				_PTHREAD_MUTEX_POLICY_FIRSTFIT)) {
-		__pthread_mutex_default_policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+	if (envvar) {
+		int policy = envvar[0] - '0';
+		if (_pthread_mutex_policy_validate(policy)) {
+			opt = _pthread_mutex_policy_to_opt(policy);
+		}
+	}
+
+	if (opt != __pthread_mutex_default_opt_policy) {
+		__pthread_mutex_default_opt_policy = opt;
 	}
 }
 
@@ -162,7 +202,7 @@ mutex_seq_load(mutex_seq *seqaddr, mutex_seq *oldseqval)
 #define mutex_seq_atomic_load(seqaddr, oldseqval, m) \
 		mutex_seq_atomic_load_##m(seqaddr, oldseqval)
 
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
 static inline bool
 mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
 		mutex_seq *newseqval)
@@ -171,7 +211,7 @@ mutex_seq_atomic_cmpxchgv_relaxed(mutex_seq *seqaddr, mutex_seq *oldseqval,
 			newseqval->seq_LU, &oldseqval->seq_LU, relaxed);
 }
 
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
 static inline bool
 mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
 		mutex_seq *newseqval)
@@ -180,7 +220,7 @@ mutex_seq_atomic_cmpxchgv_acquire(mutex_seq *seqaddr, mutex_seq *oldseqval,
 			newseqval->seq_LU, &oldseqval->seq_LU, acquire);
 }
 
-PTHREAD_ALWAYS_INLINE
+PTHREAD_ALWAYS_INLINE PTHREAD_USED
 static inline bool
 mutex_seq_atomic_cmpxchgv_release(mutex_seq *seqaddr, mutex_seq *oldseqval,
 		mutex_seq *newseqval)
@@ -274,8 +314,16 @@ pthread_mutexattr_getpolicy_np(const pthread_mutexattr_t *attr, int *policy)
 {
 	int res = EINVAL;
 	if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
-		*policy = attr->policy;
-		res = 0;
+		switch (attr->opt) {
+		case _PTHREAD_MTX_OPT_POLICY_FAIRSHARE:
+			*policy = PTHREAD_MUTEX_POLICY_FAIRSHARE_NP;
+			res = 0;
+			break;
+		case _PTHREAD_MTX_OPT_POLICY_FIRSTFIT:
+			*policy = PTHREAD_MUTEX_POLICY_FIRSTFIT_NP;
+			res = 0;
+			break;
+		}
 	}
 	return res;
 }
@@ -307,7 +355,7 @@ pthread_mutexattr_init(pthread_mutexattr_t *attr)
 {
 	attr->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
 	attr->protocol = _PTHREAD_DEFAULT_PROTOCOL;
-	attr->policy = __pthread_mutex_default_policy;
+	attr->opt = __pthread_mutex_default_opt_policy;
 	attr->type = PTHREAD_MUTEX_DEFAULT;
 	attr->sig = _PTHREAD_MUTEX_ATTR_SIG;
 	attr->pshared = _PTHREAD_DEFAULT_PSHARED;
@@ -349,12 +397,18 @@ pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *attr, int policy)
 {
 	int res = EINVAL;
 	if (attr->sig == _PTHREAD_MUTEX_ATTR_SIG) {
+		// <rdar://problem/35844519> the first-fit implementation was broken
+		// pre-Liberty so this mapping exists to ensure that the old first-fit
+		// define (2) is no longer valid when used on older systems.
 		switch (policy) {
-			case _PTHREAD_MUTEX_POLICY_FAIRSHARE:
-			case _PTHREAD_MUTEX_POLICY_FIRSTFIT:
-				attr->policy = policy;
-				res = 0;
-				break;
+		case PTHREAD_MUTEX_POLICY_FAIRSHARE_NP:
+			attr->opt = _PTHREAD_MTX_OPT_POLICY_FAIRSHARE;
+			res = 0;
+			break;
+		case PTHREAD_MUTEX_POLICY_FIRSTFIT_NP:
+			attr->opt = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
+			res = 0;
+			break;
 		}
 	}
 	return res;
@@ -412,6 +466,115 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex)
 }
 
 
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_check_init_slow(_pthread_mutex *mutex)
+{
+	int res = EINVAL;
+
+	if (_pthread_mutex_check_signature_init(mutex)) {
+		_PTHREAD_LOCK(mutex->lock);
+		if (_pthread_mutex_check_signature_init(mutex)) {
+			// initialize a statically initialized mutex to provide
+			// compatibility for misbehaving applications.
+			// (unlock should not be the first operation on a mutex)
+			res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
+		} else if (_pthread_mutex_check_signature(mutex)) {
+			res = 0;
+		}
+		_PTHREAD_UNLOCK(mutex->lock);
+	} else if (_pthread_mutex_check_signature(mutex)) {
+		res = 0;
+	}
+	if (res != 0) {
+		PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
+	}
+	return res;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_check_init(_pthread_mutex *mutex)
+{
+	int res = 0;
+	if (!_pthread_mutex_check_signature(mutex)) {
+		return _pthread_mutex_check_init_slow(mutex);
+	}
+	return res;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_fairshare(_pthread_mutex *mutex)
+{
+	return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FAIRSHARE);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_firstfit(_pthread_mutex *mutex)
+{
+	return (mutex->mtxopts.options.policy == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_is_recursive(_pthread_mutex *mutex)
+{
+	return (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE);
+}
+
+PTHREAD_ALWAYS_INLINE
+static int
+_pthread_mutex_lock_handle_options(_pthread_mutex *mutex, bool trylock,
+		uint64_t *tidaddr)
+{
+	if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) {
+		// NORMAL does not do EDEADLK checking
+		return 0;
+	}
+
+	uint64_t selfid = _pthread_selfid_direct();
+	if (os_atomic_load(tidaddr, relaxed) == selfid) {
+		if (_pthread_mutex_is_recursive(mutex)) {
+			if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
+				mutex->mtxopts.options.lock_count += 1;
+				return mutex->mtxopts.options.lock_count;
+			} else {
+				return -EAGAIN;
+			}
+		} else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
+			// <rdar://problem/16261552> as per OpenGroup, trylock cannot
+			// return EDEADLK on a deadlock, it should return EBUSY.
+			return -EBUSY;
+		} else { /* PTHREAD_MUTEX_ERRORCHECK */
+			return -EDEADLK;
+		}
+	}
+
+	// Not recursive, or recursive but first lock.
+	return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static int
+_pthread_mutex_unlock_handle_options(_pthread_mutex *mutex, uint64_t *tidaddr)
+{
+	if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL) {
+		// NORMAL does not do EDEADLK checking
+		return 0;
+	}
+
+	uint64_t selfid = _pthread_selfid_direct();
+	if (os_atomic_load(tidaddr, relaxed) != selfid) {
+		return -EPERM;
+	} else if (_pthread_mutex_is_recursive(mutex) &&
+			--mutex->mtxopts.options.lock_count) {
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * Sequence numbers and TID:
  *
@@ -444,11 +607,9 @@ _pthread_mutex_corruption_abort(_pthread_mutex *mutex)
  */
 PTHREAD_ALWAYS_INLINE
 static inline int
-_pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
-		uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+_pthread_mutex_fairshare_unlock_updatebits(_pthread_mutex *mutex,
+		uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
 {
-	bool firstfit = (mutex->mtxopts.options.policy ==
-			_PTHREAD_MUTEX_POLICY_FIRSTFIT);
 	uint32_t flags = mutex->mtxopts.value;
 	flags &= ~_PTHREAD_MTX_OPT_NOTIFY; // no notification by default
 
@@ -462,27 +623,24 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
 	MUTEX_GETTID_ADDR(mutex, &tidaddr);
 	uint64_t oldtid, newtid;
 
-	if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-		uint64_t selfid = _pthread_selfid_direct();
-		if (os_atomic_load(tidaddr, relaxed) != selfid) {
-			PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, EPERM);
-			return EPERM;
-		} else if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE &&
-			   --mutex->mtxopts.options.lock_count) {
-			PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
-			if (flagsp != NULL) {
-				*flagsp = flags;
-			}
-			return 0;
+	int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr);
+	if (res > 0) {
+		// Valid recursive unlock
+		if (flagsp) {
+			*flagsp = flags;
 		}
+		PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
+		return 0;
+	} else if (res < 0) {
+		PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res);
+		return -res;
 	}
 
-	bool clearprepost, clearnotify, spurious;
+	bool clearnotify, spurious;
 	do {
 		newseq = oldseq;
 		oldtid = os_atomic_load(tidaddr, relaxed);
 
-		clearprepost = false;
 		clearnotify = false;
 		spurious = false;
 
@@ -504,13 +662,7 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
 				clearnotify = true;
 				newtid = 0; // clear owner
 			} else {
-				if (firstfit) {
-					// reset E bit so another can acquire meanwhile
-					newseq.lgenval &= ~PTH_RWL_EBIT;
-					newtid = 0;
-				} else {
-					newtid = PTHREAD_MTX_TID_SWITCHING;
-				}
+				newtid = PTHREAD_MTX_TID_SWITCHING;
 				// need to signal others waiting for mutex
 				flags |= _PTHREAD_MTX_OPT_NOTIFY;
 			}
@@ -530,21 +682,12 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
 
 		if (clearnotify || spurious) {
 			flags &= ~_PTHREAD_MTX_OPT_NOTIFY;
-			if (firstfit && (newseq.lgenval & PTH_RWL_PBIT)) {
-				clearprepost = true;
-				newseq.lgenval &= ~PTH_RWL_PBIT;
-			}
 		}
 	} while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
 
 	PTHREAD_TRACE(psynch_mutex_unlock_updatebits, mutex, oldseq.lgenval,
 			newseq.lgenval, oldtid);
 
-	if (clearprepost) {
-		__psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0,
-				newseq.lgenval, flags | _PTHREAD_MTX_OPT_MUTEX);
-	}
-
 	if (mgenp != NULL) {
 		*mgenp = newseq.lgenval;
 	}
@@ -561,20 +704,11 @@ _pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp,
 	return 0;
 }
 
-PTHREAD_NOEXPORT PTHREAD_NOINLINE
-int
-_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp,
-		uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
-{
-	return _pthread_mutex_unlock_updatebits(mutex, flagsp, pmtxp, mgenp, ugenp);
-}
-
 PTHREAD_ALWAYS_INLINE
 static inline int
-_pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
+_pthread_mutex_fairshare_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 {
-	bool firstfit = (mutex->mtxopts.options.policy ==
-			_PTHREAD_MUTEX_POLICY_FIRSTFIT);
+	bool firstfit = _pthread_mutex_is_firstfit(mutex);
 	bool gotlock = true;
 
 	mutex_seq *seqaddr;
@@ -585,11 +719,9 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 
 	uint64_t *tidaddr;
 	MUTEX_GETTID_ADDR(mutex, &tidaddr);
-	uint64_t oldtid;
 
 	do {
 		newseq = oldseq;
-		oldtid = os_atomic_load(tidaddr, relaxed);
 
 		if (firstfit) {
 			// firstfit locks can have the lock stolen out from under a locker
@@ -605,17 +737,14 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 
 		newseq.lgenval |= PTH_RWL_KBIT | PTH_RWL_EBIT;
 	} while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
-			relaxed));
+			acquire));
 
 	if (gotlock) {
-		if (!os_atomic_cmpxchg(tidaddr, oldtid, selfid, relaxed)) {
-			// we own this mutex, nobody should be updating it except us
-			return _pthread_mutex_corruption_abort(mutex);
-		}
+		os_atomic_store(tidaddr, selfid, relaxed);
 	}
 
 	PTHREAD_TRACE(psynch_mutex_lock_updatebits, mutex, oldseq.lgenval,
-			newseq.lgenval, oldtid);
+			newseq.lgenval, 0);
 
 	// failing to take the lock in firstfit returns 1 to force the caller
 	// to wait in the kernel
@@ -624,114 +753,36 @@ _pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 
 PTHREAD_NOINLINE
 static int
-_pthread_mutex_markprepost(_pthread_mutex *mutex, uint32_t updateval)
-{
-	mutex_seq *seqaddr;
-	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
-
-	mutex_seq oldseq, newseq;
-	mutex_seq_load(seqaddr, &oldseq);
-
-	bool clearprepost;
-	do {
-		clearprepost = false;
-		newseq = oldseq;
-
-		/* update the bits */
-		if ((oldseq.lgenval & PTHRW_COUNT_MASK) ==
-				(oldseq.ugenval & PTHRW_COUNT_MASK)) {
-			clearprepost = true;
-			newseq.lgenval &= ~PTH_RWL_PBIT;
-		} else {
-			newseq.lgenval |= PTH_RWL_PBIT;
-		}
-	} while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, relaxed));
-
-	if (clearprepost) {
-		__psynch_cvclrprepost(mutex, newseq.lgenval, newseq.ugenval, 0, 0,
-				newseq.lgenval, mutex->mtxopts.value | _PTHREAD_MTX_OPT_MUTEX);
-	}
-
-	return 0;
-}
-
-PTHREAD_NOINLINE
-static int
-_pthread_mutex_check_init_slow(pthread_mutex_t *omutex)
-{
-	int res = EINVAL;
-	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
-	if (_pthread_mutex_check_signature_init(mutex)) {
-		_PTHREAD_LOCK(mutex->lock);
-		if (_pthread_mutex_check_signature_init(mutex)) {
-			// initialize a statically initialized mutex to provide
-			// compatibility for misbehaving applications.
-			// (unlock should not be the first operation on a mutex)
-			res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
-		} else if (_pthread_mutex_check_signature(mutex)) {
-			res = 0;
-		}
-		_PTHREAD_UNLOCK(mutex->lock);
-	} else if (_pthread_mutex_check_signature(mutex)) {
-		res = 0;
-	}
-	if (res != 0) {
-		PLOCKSTAT_MUTEX_ERROR(omutex, res);
-	}
-	return res;
-}
-
-PTHREAD_ALWAYS_INLINE
-static inline int
-_pthread_mutex_check_init(pthread_mutex_t *omutex)
-{
-	int res = 0;
-	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
-	if (!_pthread_mutex_check_signature(mutex)) {
-		return _pthread_mutex_check_init_slow(omutex);
-	}
-	return res;
-}
-
-PTHREAD_NOINLINE
-static int
-_pthread_mutex_lock_wait(pthread_mutex_t *omutex, mutex_seq newseq,
+_pthread_mutex_fairshare_lock_wait(_pthread_mutex *mutex, mutex_seq newseq,
 		uint64_t oldtid)
 {
-	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
 	uint64_t *tidaddr;
 	MUTEX_GETTID_ADDR(mutex, &tidaddr);
 	uint64_t selfid = _pthread_selfid_direct();
 
-	PLOCKSTAT_MUTEX_BLOCK(omutex);
+	PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex);
 	do {
 		uint32_t updateval;
 		do {
-			updateval = __psynch_mutexwait(omutex, newseq.lgenval,
+			updateval = __psynch_mutexwait(mutex, newseq.lgenval,
 					newseq.ugenval, oldtid, mutex->mtxopts.value);
 			oldtid = os_atomic_load(tidaddr, relaxed);
 		} while (updateval == (uint32_t)-1);
 
 		// returns 0 on succesful update; in firstfit it may fail with 1
-	} while (_pthread_mutex_lock_updatebits(mutex, selfid) == 1);
-	PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT);
+	} while (_pthread_mutex_fairshare_lock_updatebits(mutex, selfid) == 1);
+	PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT);
 
 	return 0;
 }
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
-_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
+_pthread_mutex_fairshare_lock_slow(_pthread_mutex *omutex, bool trylock)
 {
 	int res, recursive = 0;
 	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
 
-	res = _pthread_mutex_check_init(omutex);
-	if (res != 0) return res;
-
 	mutex_seq *seqaddr;
 	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
 
@@ -742,25 +793,14 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
 	MUTEX_GETTID_ADDR(mutex, &tidaddr);
 	uint64_t oldtid, selfid = _pthread_selfid_direct();
 
-	if (mutex->mtxopts.options.type != PTHREAD_MUTEX_NORMAL) {
-		if (os_atomic_load(tidaddr, relaxed) == selfid) {
-			if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
-				if (mutex->mtxopts.options.lock_count < USHRT_MAX) {
-					mutex->mtxopts.options.lock_count++;
-					recursive = 1;
-					res = 0;
-				} else {
-					res = EAGAIN;
-				}
-			} else if (trylock) { /* PTHREAD_MUTEX_ERRORCHECK */
-				// <rdar://problem/16261552> as per OpenGroup, trylock cannot
-				// return EDEADLK on a deadlock, it should return EBUSY.
-				res = EBUSY;
-			} else	{ /* PTHREAD_MUTEX_ERRORCHECK */
-				res = EDEADLK;
-			}
-			goto out;
-		}
+	res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr);
+	if (res > 0) {
+		recursive = 1;
+		res = 0;
+		goto out;
+	} else if (res < 0) {
+		res = -res;
+		goto out;
 	}
 
 	bool gotlock;
@@ -797,44 +837,39 @@ _pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
 	} else {
 		PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, omutex,
 				newseq.lgenval, newseq.ugenval, oldtid);
-		res = _pthread_mutex_lock_wait(omutex, newseq, oldtid);
+		res = _pthread_mutex_fairshare_lock_wait(mutex, newseq, oldtid);
 		PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, omutex,
 				newseq.lgenval, newseq.ugenval, oldtid);
 	}
 
-	if (res == 0 && mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
+	if (res == 0 && _pthread_mutex_is_recursive(mutex)) {
 		mutex->mtxopts.options.lock_count = 1;
 	}
 
 out:
 #if PLOCKSTAT
 	if (res == 0) {
-		PLOCKSTAT_MUTEX_ACQUIRE(omutex, recursive, 0);
+		PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0);
 	} else {
-		PLOCKSTAT_MUTEX_ERROR(omutex, res);
+		PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
 	}
 #endif
 
 	return res;
 }
 
-PTHREAD_ALWAYS_INLINE
+PTHREAD_NOINLINE
 static inline int
-_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
+_pthread_mutex_fairshare_lock(_pthread_mutex *mutex, bool trylock)
 {
 #if ENABLE_USERSPACE_TRACE
-	return _pthread_mutex_lock_slow(omutex, trylock);
+	return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
 #elif PLOCKSTAT
 	if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
-		return _pthread_mutex_lock_slow(omutex, trylock);
+		return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
 	}
 #endif
 
-	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
-	if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
-		return _pthread_mutex_lock_slow(omutex, trylock);
-	}
-
 	uint64_t *tidaddr;
 	MUTEX_GETTID_ADDR(mutex, &tidaddr);
 	uint64_t selfid = _pthread_selfid_direct();
@@ -846,7 +881,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
 	mutex_seq_load(seqaddr, &oldseq);
 
 	if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
-		return _pthread_mutex_lock_slow(omutex, trylock);
+		return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
 	}
 
 	bool gotlock;
@@ -865,7 +900,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
 			newseq.lgenval += PTHRW_INC;
 			newseq.lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
 		} else {
-			return _pthread_mutex_lock_slow(omutex, trylock);
+			return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
 		}
 	} while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
 			acquire)));
@@ -880,45 +915,24 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
 	}
 }
 
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_lock(pthread_mutex_t *mutex)
-{
-	return _pthread_mutex_lock(mutex, false);
-}
-
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_trylock(pthread_mutex_t *mutex)
-{
-	return _pthread_mutex_lock(mutex, true);
-}
-
-/*
- * Unlock a mutex.
- * TODO: Priority inheritance stuff
- */
-
 PTHREAD_NOINLINE
 static int
-_pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
+_pthread_mutex_fairshare_unlock_drop(_pthread_mutex *mutex, mutex_seq newseq,
 		uint32_t flags)
 {
 	int res;
-	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
-
 	uint32_t updateval;
 
 	uint64_t *tidaddr;
 	MUTEX_GETTID_ADDR(mutex, &tidaddr);
 
-	PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, omutex, newseq.lgenval,
+	PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_START, mutex, newseq.lgenval,
 			newseq.ugenval, os_atomic_load(tidaddr, relaxed));
 
-	updateval = __psynch_mutexdrop(omutex, newseq.lgenval, newseq.ugenval,
+	updateval = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval,
 			os_atomic_load(tidaddr, relaxed), flags);
 
-	PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, omutex, updateval, 0, 0);
+	PTHREAD_TRACE(psynch_mutex_uunlock | DBG_FUNC_END, mutex, updateval, 0, 0);
 
 	if (updateval == (uint32_t)-1) {
 		res = errno;
@@ -930,9 +944,6 @@ _pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
 			PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
 		}
 		return res;
-	} else if ((mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT)
-			&& (updateval & PTH_RWL_PBIT)) {
-		return _pthread_mutex_markprepost(mutex, updateval);
 	}
 
 	return 0;
@@ -940,49 +951,39 @@ _pthread_mutex_unlock_drop(pthread_mutex_t *omutex, mutex_seq newseq,
 
 PTHREAD_NOEXPORT PTHREAD_NOINLINE
 int
-_pthread_mutex_unlock_slow(pthread_mutex_t *omutex)
+_pthread_mutex_fairshare_unlock_slow(_pthread_mutex *mutex)
 {
 	int res;
-	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
 	mutex_seq newseq;
 	uint32_t flags;
 
-	// Initialize static mutexes for compatibility with misbehaving
-	// applications (unlock should not be the first operation on a mutex).
-	res = _pthread_mutex_check_init(omutex);
-	if (res != 0) return res;
-
-	res = _pthread_mutex_unlock_updatebits(mutex, &flags, NULL, &newseq.lgenval,
-			&newseq.ugenval);
+	res = _pthread_mutex_fairshare_unlock_updatebits(mutex, &flags, NULL,
+			&newseq.lgenval, &newseq.ugenval);
 	if (res != 0) return res;
 
 	if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
-		return _pthread_mutex_unlock_drop(omutex, newseq, flags);
+		return _pthread_mutex_fairshare_unlock_drop(mutex, newseq, flags);
 	} else {
 		uint64_t *tidaddr;
 		MUTEX_GETTID_ADDR(mutex, &tidaddr);
-		PTHREAD_TRACE(psynch_mutex_uunlock, omutex, newseq.lgenval,
+		PTHREAD_TRACE(psynch_mutex_uunlock, mutex, newseq.lgenval,
 				newseq.ugenval, os_atomic_load(tidaddr, relaxed));
 	}
 
 	return 0;
 }
 
-PTHREAD_NOEXPORT_VARIANT
-int
-pthread_mutex_unlock(pthread_mutex_t *omutex)
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_fairshare_unlock(_pthread_mutex *mutex)
 {
 #if ENABLE_USERSPACE_TRACE
-	return _pthread_mutex_unlock_slow(omutex);
+	return _pthread_mutex_fairshare_unlock_slow(mutex);
 #elif PLOCKSTAT
 	if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
-		return _pthread_mutex_unlock_slow(omutex);
+		return _pthread_mutex_fairshare_unlock_slow(mutex);
 	}
 #endif
-	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
-	if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
-		return _pthread_mutex_unlock_slow(omutex);
-	}
 
 	uint64_t *tidaddr;
 	MUTEX_GETTID_ADDR(mutex, &tidaddr);
@@ -1012,13 +1013,15 @@ pthread_mutex_unlock(pthread_mutex_t *omutex)
 
 		if (os_likely((oldseq.lgenval & PTHRW_COUNT_MASK) ==
 				(newseq.ugenval & PTHRW_COUNT_MASK))) {
-			// our unlock sequence matches to lock sequence, so if the
-			// CAS is successful, the mutex is unlocked
+			// if we succeed in performing the CAS we can be sure of a fast
+			// path (only needing the CAS) unlock, if:
+			//   a. our lock and unlock sequence are equal
+			//   b. we don't need to clear an unlock prepost from the kernel
 
 			// do not reset Ibit, just K&E
 			newseq.lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
 		} else {
-			return _pthread_mutex_unlock_slow(omutex);
+			return _pthread_mutex_fairshare_unlock_slow(mutex);
 		}
 	} while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
 			release)));
@@ -1026,6 +1029,468 @@ pthread_mutex_unlock(pthread_mutex_t *omutex)
 	return 0;
 }
 
+#pragma mark firstfit
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_firstfit_unlock_updatebits(_pthread_mutex *mutex,
+		uint32_t *flagsp, uint32_t **mutexp, uint32_t *lvalp, uint32_t *uvalp)
+{
+	uint32_t flags = mutex->mtxopts.value & ~_PTHREAD_MTX_OPT_NOTIFY;
+	bool kernel_wake;
+
+	mutex_seq *seqaddr;
+	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+	mutex_seq oldseq, newseq;
+	mutex_seq_load(seqaddr, &oldseq);
+
+	uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+	uint64_t oldtid;
+
+	int res = _pthread_mutex_unlock_handle_options(mutex, tidaddr);
+	if (res > 0) {
+		// Valid recursive unlock
+		if (flagsp) {
+			*flagsp = flags;
+		}
+		PLOCKSTAT_MUTEX_RELEASE((pthread_mutex_t *)mutex, 1);
+		return 0;
+	} else if (res < 0) {
+		PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, -res);
+		return -res;
+	}
+
+	do {
+		newseq = oldseq;
+		oldtid = os_atomic_load(tidaddr, relaxed);
+		// More than one kernel waiter means we need to do a wake.
+		kernel_wake = diff_genseq(oldseq.lgenval, oldseq.ugenval) > 0;
+		newseq.lgenval &= ~PTH_RWL_EBIT;
+
+		if (kernel_wake) {
+			// Going to the kernel post-unlock removes a single waiter unlock
+			// from the mutex counts.
+			newseq.ugenval += PTHRW_INC;
+		}
+
+		if (oldtid != 0) {
+			if (!os_atomic_cmpxchg(tidaddr, oldtid, 0, relaxed)) {
+				return _pthread_mutex_corruption_abort(mutex);
+			}
+		}
+	} while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, release));
+
+	PTHREAD_TRACE(psynch_ffmutex_unlock_updatebits, mutex, oldseq.lgenval,
+			newseq.lgenval, newseq.ugenval);
+
+	if (kernel_wake) {
+		// We choose to return this out via flags because the condition
+		// variable also uses this to determine whether to do a kernel wake
+		// when beginning a cvwait.
+		flags |= _PTHREAD_MTX_OPT_NOTIFY;
+	}
+	if (lvalp) {
+		*lvalp = newseq.lgenval;
+	}
+	if (uvalp) {
+		*uvalp = newseq.ugenval;
+	}
+	if (mutexp) {
+		*mutexp = (uint32_t *)mutex;
+	}
+	if (flagsp) {
+		*flagsp = flags;
+	}
+	return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+static int
+_pthread_mutex_firstfit_wake(_pthread_mutex *mutex, mutex_seq newseq,
+		uint32_t flags)
+{
+	PTHREAD_TRACE(psynch_ffmutex_wake, mutex, newseq.lgenval, newseq.ugenval,
+			0);
+	int res = __psynch_mutexdrop(mutex, newseq.lgenval, newseq.ugenval, 0,
+			flags);
+
+	if (res == -1) {
+		res = errno;
+		if (res == EINTR) {
+			res = 0;
+		}
+		if (res != 0) {
+			PTHREAD_ABORT("__psynch_mutexdrop failed with error %d", res);
+		}
+		return res;
+	}
+	return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_firstfit_unlock_slow(_pthread_mutex *mutex)
+{
+	mutex_seq newseq;
+	uint32_t flags;
+	int res;
+
+	res = _pthread_mutex_firstfit_unlock_updatebits(mutex, &flags, NULL,
+			&newseq.lgenval, &newseq.ugenval);
+	if (res != 0) return res;
+
+	if (flags & _PTHREAD_MTX_OPT_NOTIFY) {
+		return _pthread_mutex_firstfit_wake(mutex, newseq, flags);
+	}
+	return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static bool
+_pthread_mutex_firstfit_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid,
+		mutex_seq *newseqp)
+{
+	bool gotlock;
+
+	mutex_seq *seqaddr;
+	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+	mutex_seq oldseq, newseq;
+	mutex_seq_load(seqaddr, &oldseq);
+
+	uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+	PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex,
+			oldseq.lgenval, oldseq.ugenval, 0);
+
+	do {
+		newseq = oldseq;
+		gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+
+		if (gotlock) {
+			// If we see the E-bit cleared, we should just attempt to take it.
+			newseq.lgenval |= PTH_RWL_EBIT;
+		} else {
+			// If we failed to get the lock then we need to put ourselves back
+			// in the queue of waiters. The previous unlocker that woke us out
+			// of the kernel consumed the S-count for our previous wake. So
+			// take another ticket on L and go back in the kernel to sleep.
+			newseq.lgenval += PTHRW_INC;
+		}
+	} while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
+
+	if (gotlock) {
+		os_atomic_store(tidaddr, selfid, relaxed);
+	}
+
+	PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex,
+			newseq.lgenval, newseq.ugenval, 0);
+
+	if (newseqp) {
+		*newseqp = newseq;
+	}
+	return gotlock;
+}
+
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_firstfit_lock_wait(_pthread_mutex *mutex, mutex_seq newseq,
+		uint64_t oldtid)
+{
+	uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+	uint64_t selfid = _pthread_selfid_direct();
+
+	PLOCKSTAT_MUTEX_BLOCK((pthread_mutex_t *)mutex);
+	do {
+		uint32_t uval;
+		do {
+			PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_START, mutex,
+					newseq.lgenval, newseq.ugenval, mutex->mtxopts.value);
+			uval = __psynch_mutexwait(mutex, newseq.lgenval, newseq.ugenval,
+					oldtid, mutex->mtxopts.value);
+			PTHREAD_TRACE(psynch_ffmutex_wait | DBG_FUNC_END, mutex,
+					uval, 0, 0);
+			oldtid = os_atomic_load(tidaddr, relaxed);
+		} while (uval == (uint32_t)-1);
+	} while (!_pthread_mutex_firstfit_lock_updatebits(mutex, selfid, &newseq));
+	PLOCKSTAT_MUTEX_BLOCKED((pthread_mutex_t *)mutex, BLOCK_SUCCESS_PLOCKSTAT);
+
+	return 0;
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_firstfit_lock_slow(_pthread_mutex *mutex, bool trylock)
+{
+	int res, recursive = 0;
+
+	mutex_seq *seqaddr;
+	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+	mutex_seq oldseq, newseq;
+	mutex_seq_load(seqaddr, &oldseq);
+
+	uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+	uint64_t oldtid, selfid = _pthread_selfid_direct();
+
+	res = _pthread_mutex_lock_handle_options(mutex, trylock, tidaddr);
+	if (res > 0) {
+		recursive = 1;
+		res = 0;
+		goto out;
+	} else if (res < 0) {
+		res = -res;
+		goto out;
+	}
+
+	PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_START, mutex,
+			oldseq.lgenval, oldseq.ugenval, 0);
+
+	bool gotlock;
+	do {
+		newseq = oldseq;
+		oldtid = os_atomic_load(tidaddr, relaxed);
+
+		gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+		if (trylock && !gotlock) {
+			// We still want to perform the CAS here, even though it won't
+			// do anything so that it fails if someone unlocked while we were
+			// in the loop
+		} else if (gotlock) {
+			// In first-fit, getting the lock simply adds the E-bit
+			newseq.lgenval |= PTH_RWL_EBIT;
+		} else {
+			// Failed to get the lock, increment the L-val and go to
+			// the kernel to sleep
+			newseq.lgenval += PTHRW_INC;
+		}
+	} while (!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq, acquire));
+
+	PTHREAD_TRACE(psynch_ffmutex_lock_updatebits | DBG_FUNC_END, mutex,
+			newseq.lgenval, newseq.ugenval, 0);
+
+	if (gotlock) {
+		os_atomic_store(tidaddr, selfid, relaxed);
+		res = 0;
+		PTHREAD_TRACE(psynch_mutex_ulock, mutex, newseq.lgenval,
+				newseq.ugenval, selfid);
+	} else if (trylock) {
+		res = EBUSY;
+		PTHREAD_TRACE(psynch_mutex_utrylock_failed, mutex, newseq.lgenval,
+				newseq.ugenval, oldtid);
+	} else {
+		PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_START, mutex,
+				newseq.lgenval, newseq.ugenval, oldtid);
+		res = _pthread_mutex_firstfit_lock_wait(mutex, newseq, oldtid);
+		PTHREAD_TRACE(psynch_mutex_ulock | DBG_FUNC_END, mutex,
+				newseq.lgenval, newseq.ugenval, oldtid);
+	}
+
+	if (res == 0 && _pthread_mutex_is_recursive(mutex)) {
+		mutex->mtxopts.options.lock_count = 1;
+	}
+
+out:
+#if PLOCKSTAT
+	if (res == 0) {
+		PLOCKSTAT_MUTEX_ACQUIRE((pthread_mutex_t *)mutex, recursive, 0);
+	} else {
+		PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, res);
+	}
+#endif
+	return res;
+}
+
+#pragma mark fast path
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_droplock(_pthread_mutex *mutex, uint32_t *flagsp,
+		uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+{
+	if (_pthread_mutex_is_fairshare(mutex)) {
+		return _pthread_mutex_fairshare_unlock_updatebits(mutex, flagsp,
+				pmtxp, mgenp, ugenp);
+	}
+	return _pthread_mutex_firstfit_unlock_updatebits(mutex, flagsp, pmtxp,
+			mgenp, ugenp);
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+int
+_pthread_mutex_lock_init_slow(_pthread_mutex *mutex, bool trylock)
+{
+	int res;
+
+	res = _pthread_mutex_check_init(mutex);
+	if (res != 0) return res;
+
+	if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+		return _pthread_mutex_fairshare_lock_slow(mutex, trylock);
+	}
+	return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+}
+
+PTHREAD_NOEXPORT PTHREAD_NOINLINE
+static int
+_pthread_mutex_unlock_init_slow(_pthread_mutex *mutex)
+{
+	int res;
+
+	// Initialize static mutexes for compatibility with misbehaving
+	// applications (unlock should not be the first operation on a mutex).
+	res = _pthread_mutex_check_init(mutex);
+	if (res != 0) return res;
+
+	if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+		return _pthread_mutex_fairshare_unlock_slow(mutex);
+	}
+	return _pthread_mutex_firstfit_unlock_slow(mutex);
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_unlock(pthread_mutex_t *omutex)
+{
+	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
+	if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+		return _pthread_mutex_unlock_init_slow(mutex);
+	}
+
+	if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+		return _pthread_mutex_fairshare_unlock(mutex);
+	}
+
+#if ENABLE_USERSPACE_TRACE
+	return _pthread_mutex_firstfit_unlock_slow(mutex);
+#elif PLOCKSTAT
+	if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+		return _pthread_mutex_firstfit_unlock_slow(mutex);
+	}
+#endif
+
+	/*
+	 * This is the first-fit fast path. The fairshare fast-ish path is in
+	 * _pthread_mutex_firstfit_unlock()
+	 */
+	uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+	mutex_seq *seqaddr;
+	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+	mutex_seq oldseq, newseq;
+	mutex_seq_load(seqaddr, &oldseq);
+
+	// We're giving up the mutex one way or the other, so go ahead and
+	// update the owner to 0 so that once the CAS below succeeds, there
+	// is no stale ownership information. If the CAS of the seqaddr
+	// fails, we may loop, but it's still valid for the owner to be
+	// SWITCHING/0
+	os_atomic_store(tidaddr, 0, relaxed);
+
+	do {
+		newseq = oldseq;
+
+		if (diff_genseq(oldseq.lgenval, oldseq.ugenval) == 0) {
+			// No outstanding waiters in kernel, we can simply drop the E-bit
+			// and return.
+			newseq.lgenval &= ~PTH_RWL_EBIT;
+		} else {
+			return _pthread_mutex_firstfit_unlock_slow(mutex);
+		}
+	} while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+			release)));
+
+	return 0;
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_firstfit_lock(pthread_mutex_t *omutex, bool trylock)
+{
+	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
+	if (os_unlikely(!_pthread_mutex_check_signature_fast(mutex))) {
+		return _pthread_mutex_lock_init_slow(mutex, trylock);
+	}
+
+	if (os_unlikely(_pthread_mutex_is_fairshare(mutex))) {
+		return _pthread_mutex_fairshare_lock(mutex, trylock);
+	}
+
+#if ENABLE_USERSPACE_TRACE
+	return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+#elif PLOCKSTAT
+	if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED()) {
+		return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+	}
+#endif
+
+	/*
+	 * This is the first-fit fast path. The fairshare fast-ish path is in
+	 * _pthread_mutex_firstfit_lock()
+	 */
+	uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+	uint64_t selfid = _pthread_selfid_direct();
+
+	mutex_seq *seqaddr;
+	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+	mutex_seq oldseq, newseq;
+	mutex_seq_load(seqaddr, &oldseq);
+
+	if (os_unlikely(oldseq.lgenval & PTH_RWL_EBIT)) {
+		return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+	}
+
+	bool gotlock;
+	do {
+		newseq = oldseq;
+		gotlock = is_rwl_ebit_clear(oldseq.lgenval);
+
+		if (trylock && !gotlock) {
+			// A trylock on a held lock will fail immediately. But since
+			// we did not load the sequence words atomically, perform a
+			// no-op CAS64 to ensure that nobody has unlocked concurrently.
+		} else if (os_likely(gotlock)) {
+			// In first-fit, getting the lock simply adds the E-bit
+			newseq.lgenval |= PTH_RWL_EBIT;
+		} else {
+			return _pthread_mutex_firstfit_lock_slow(mutex, trylock);
+		}
+	} while (os_unlikely(!mutex_seq_atomic_cmpxchgv(seqaddr, &oldseq, &newseq,
+			acquire)));
+
+	if (os_likely(gotlock)) {
+		os_atomic_store(tidaddr, selfid, relaxed);
+		return 0;
+	} else if (trylock) {
+		return EBUSY;
+	} else {
+		__builtin_trap();
+	}
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+	return _pthread_mutex_firstfit_lock(mutex, false);
+}
+
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+	return _pthread_mutex_firstfit_lock(mutex, true);
+}
+
 
 PTHREAD_ALWAYS_INLINE
 static inline int
@@ -1040,7 +1505,7 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
 		}
 		mutex->prioceiling = (int16_t)attr->prioceiling;
 		mutex->mtxopts.options.protocol = attr->protocol;
-		mutex->mtxopts.options.policy = attr->policy;
+		mutex->mtxopts.options.policy = attr->opt;
 		mutex->mtxopts.options.type = attr->type;
 		mutex->mtxopts.options.pshared = attr->pshared;
 	} else {
@@ -1063,9 +1528,9 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
 		mutex->prioceiling = _PTHREAD_DEFAULT_PRIOCEILING;
 		mutex->mtxopts.options.protocol = _PTHREAD_DEFAULT_PROTOCOL;
 		if (static_type != 3) {
-			mutex->mtxopts.options.policy = __pthread_mutex_default_policy;
+			mutex->mtxopts.options.policy = __pthread_mutex_default_opt_policy;
 		} else {
-			mutex->mtxopts.options.policy = _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+			mutex->mtxopts.options.policy = _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
 		}
 		mutex->mtxopts.options.pshared = _PTHREAD_DEFAULT_PSHARED;
 	}
@@ -1089,7 +1554,8 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr,
 
 	long sig = _PTHREAD_MUTEX_SIG;
 	if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL &&
-			mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FAIRSHARE) {
+			(_pthread_mutex_is_fairshare(mutex) ||
+			 _pthread_mutex_is_firstfit(mutex))) {
 		// rdar://18148854 _pthread_mutex_lock & pthread_mutex_unlock fastpath
 		sig = _PTHREAD_MUTEX_SIG_fast;
 	}
diff --git a/src/pthread_rwlock.c b/src/pthread_rwlock.c
index 85358df..5b0bc9a 100644
--- a/src/pthread_rwlock.c
+++ b/src/pthread_rwlock.c
@@ -61,8 +61,6 @@
 #include <platform/compat.h> // for bzero
 #endif
 
-extern int __unix_conforming;
-
 #ifdef PLOCKSTAT
 #include "plockstat.h"
 #else /* !PLOCKSTAT */
@@ -513,7 +511,7 @@ _pthread_rwlock_updateval(_pthread_rwlock *rwlock, uint32_t updateval)
 	rwlock_seq_load(seqaddr, &oldseq, RWLOCK_SEQ_LS);
 	do {
 		newseq = oldseq;
-		if (isoverlap || is_rws_setunlockinit(oldseq.rw_seq) != 0) {
+		if (isoverlap || is_rws_unlockinit_set(oldseq.rw_seq)) {
 			// Set S word to the specified value
 			uint32_t savebits = (oldseq.rw_seq & PTHRW_RWS_SAVEMASK);
 			newseq.lcntval = _pthread_rwlock_modbits(oldseq.lcntval, updateval,
@@ -763,7 +761,7 @@ retry:
 				newseq.lcntval |= PTH_RWL_KBIT | PTH_RWL_WBIT;
 			}
 			newseq.lcntval += PTHRW_INC;
-			if (is_rws_setseq(oldseq.rw_seq)) {
+			if (is_rws_sbit_set(oldseq.rw_seq)) {
 				// Clear the S bit and set S to L
 				newseq.rw_seq &= (PTHRW_BIT_MASK & ~PTH_RWS_SBIT);
 				newseq.rw_seq |= (oldseq.lcntval & PTHRW_COUNT_MASK);
diff --git a/src/pthread_tsd.c b/src/pthread_tsd.c
index 3a77266..54b1bb0 100644
--- a/src/pthread_tsd.c
+++ b/src/pthread_tsd.c
@@ -61,12 +61,13 @@
 // __pthread_tsd_end is the end of dynamic keys.
 
 static const int __pthread_tsd_first = __TSD_RESERVED_MAX + 1;
-static int __pthread_tsd_max = __pthread_tsd_first;
 static const int __pthread_tsd_start = _INTERNAL_POSIX_THREAD_KEYS_MAX;
 static const int __pthread_tsd_end = _INTERNAL_POSIX_THREAD_KEYS_END;
 
-static int __pthread_key_legacy_behaviour = 0;
-static int __pthread_key_legacy_behaviour_log = 0;
+static int __pthread_tsd_max = __pthread_tsd_first;
+static _pthread_lock __pthread_tsd_lock = _PTHREAD_LOCK_INITIALIZER;
+static bool __pthread_key_legacy_behaviour = 0;
+static bool __pthread_key_legacy_behaviour_log = 0;
 
 // Omit support for pthread key destructors in the static archive for dyld.
 // dyld does not create and destroy threads so these are not necessary.
@@ -80,15 +81,17 @@ static struct {
 	uintptr_t destructor;
 } _pthread_keys[_INTERNAL_POSIX_THREAD_KEYS_END];
 
-static _pthread_lock tsd_lock = _PTHREAD_LOCK_INITIALIZER;
-
 // The pthread_tsd destruction order can be reverted to the old (pre-10.11) order
 // by setting this environment variable.
 void
 _pthread_key_global_init(const char *envp[])
 {
-	__pthread_key_legacy_behaviour = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER") ? 1 : 0;
-	__pthread_key_legacy_behaviour_log = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG") ? 1 : 0;
+	if (_simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER")) {
+		__pthread_key_legacy_behaviour = true;
+	}
+	if (_simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG")) {
+		__pthread_key_legacy_behaviour_log = true;
+	}
 }
 
 // Returns true if successful, false if destructor was already set.
@@ -133,7 +136,7 @@ pthread_key_create(pthread_key_t *key, void (*destructor)(void *))
 	int res = EAGAIN; // Returns EAGAIN if key cannot be allocated.
 	pthread_key_t k;
 
-	_PTHREAD_LOCK(tsd_lock);
+	_PTHREAD_LOCK(__pthread_tsd_lock);
 	for (k = __pthread_tsd_start; k < __pthread_tsd_end; k++) {
 		if (_pthread_key_set_destructor(k, destructor)) {
 			*key = k;
@@ -141,7 +144,7 @@ pthread_key_create(pthread_key_t *key, void (*destructor)(void *))
 			break;
 		}
 	}
-	_PTHREAD_UNLOCK(tsd_lock);
+	_PTHREAD_UNLOCK(__pthread_tsd_lock);
 
 	return res;
 }
@@ -151,12 +154,12 @@ pthread_key_delete(pthread_key_t key)
 {
 	int res = EINVAL; // Returns EINVAL if key is not allocated.
 
-	_PTHREAD_LOCK(tsd_lock);
+	_PTHREAD_LOCK(__pthread_tsd_lock);
 	if (key >= __pthread_tsd_start && key < __pthread_tsd_end) {
 		if (_pthread_key_unset_destructor(key)) {
 			struct _pthread *p;
 			_PTHREAD_LOCK(_pthread_list_lock);
-			TAILQ_FOREACH(p, &__pthread_head, plist) {
+			TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
 				// No lock for word-sized write.
 				p->tsd[key] = 0;
 			}
@@ -164,7 +167,7 @@ pthread_key_delete(pthread_key_t key)
 			res = 0;
 		}
 	}
-	_PTHREAD_UNLOCK(tsd_lock);
+	_PTHREAD_UNLOCK(__pthread_tsd_lock);
 
 	return res;
 }
@@ -188,7 +191,7 @@ pthread_setspecific(pthread_key_t key, const void *value)
 				_pthread_key_set_destructor(key, NULL);
 			}
 			if (key > self->max_tsd_key) {
-				self->max_tsd_key = (int)key;
+				self->max_tsd_key = (uint16_t)key;
 			}
 		}
 	}
@@ -342,12 +345,12 @@ pthread_key_init_np(int key, void (*destructor)(void *))
 {
 	int res = EINVAL; // Returns EINVAL if key is out of range.
 	if (key >= __pthread_tsd_first && key < __pthread_tsd_start) {
-		_PTHREAD_LOCK(tsd_lock);
+		_PTHREAD_LOCK(__pthread_tsd_lock);
 		_pthread_key_set_destructor(key, destructor);
 		if (key > __pthread_tsd_max) {
 			__pthread_tsd_max = key;
 		}
-		_PTHREAD_UNLOCK(tsd_lock);
+		_PTHREAD_UNLOCK(__pthread_tsd_lock);
 		res = 0;
 	}
 	return res;
diff --git a/src/qos.c b/src/qos.c
index b31098a..ef36089 100644
--- a/src/qos.c
+++ b/src/qos.c
@@ -35,8 +35,6 @@
 #include "workqueue_private.h"
 #include "qos_private.h"
 
-static pthread_priority_t _main_qos = QOS_CLASS_UNSPECIFIED;
-
 #define PTHREAD_OVERRIDE_SIGNATURE	(0x6f766572)
 #define PTHREAD_OVERRIDE_SIG_DEAD	(0x7265766f)
 
@@ -49,172 +47,145 @@ struct pthread_override_s
 	bool malloced;
 };
 
-void
-_pthread_set_main_qos(pthread_priority_t qos)
+thread_qos_t
+_pthread_qos_class_to_thread_qos(qos_class_t qos)
 {
-	_main_qos = qos;
+	switch (qos) {
+	case QOS_CLASS_USER_INTERACTIVE: return THREAD_QOS_USER_INTERACTIVE;
+	case QOS_CLASS_USER_INITIATED: return THREAD_QOS_USER_INITIATED;
+	case QOS_CLASS_DEFAULT: return THREAD_QOS_LEGACY;
+	case QOS_CLASS_UTILITY: return THREAD_QOS_UTILITY;
+	case QOS_CLASS_BACKGROUND: return THREAD_QOS_BACKGROUND;
+	case QOS_CLASS_MAINTENANCE: return THREAD_QOS_MAINTENANCE;
+	default: return THREAD_QOS_UNSPECIFIED;
+	}
 }
 
-int
-pthread_attr_set_qos_class_np(pthread_attr_t *__attr,
-							  qos_class_t __qos_class,
-							  int __relative_priority)
-{
-	if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-		return ENOTSUP;
-	}
+static inline qos_class_t
+_pthread_qos_class_from_thread_qos(thread_qos_t tqos)
+{
+	static const qos_class_t thread_qos_to_qos_class[THREAD_QOS_LAST] = {
+		[THREAD_QOS_UNSPECIFIED]      = QOS_CLASS_UNSPECIFIED,
+		[THREAD_QOS_MAINTENANCE]      = QOS_CLASS_MAINTENANCE,
+		[THREAD_QOS_BACKGROUND]       = QOS_CLASS_BACKGROUND,
+		[THREAD_QOS_UTILITY]          = QOS_CLASS_UTILITY,
+		[THREAD_QOS_LEGACY]           = QOS_CLASS_DEFAULT,
+		[THREAD_QOS_USER_INITIATED]   = QOS_CLASS_USER_INITIATED,
+		[THREAD_QOS_USER_INTERACTIVE] = QOS_CLASS_USER_INTERACTIVE,
+	};
+	if (os_unlikely(tqos >= THREAD_QOS_LAST)) return QOS_CLASS_UNSPECIFIED;
+	return thread_qos_to_qos_class[tqos];
+}
 
-	if (__relative_priority > 0 || __relative_priority < QOS_MIN_RELATIVE_PRIORITY) {
-		return EINVAL;
+static inline thread_qos_t
+_pthread_validate_qos_class_and_relpri(qos_class_t qc, int relpri)
+{
+	if (relpri > 0 || relpri < QOS_MIN_RELATIVE_PRIORITY) {
+		return THREAD_QOS_UNSPECIFIED;
 	}
+	return _pthread_qos_class_to_thread_qos(qc);
+}
 
-	int ret = EINVAL;
-	if (__attr->sig == _PTHREAD_ATTR_SIG) {
-		if (!__attr->schedset) {
-			__attr->qosclass = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
-			__attr->qosset = 1;
-			ret = 0;
-		}
-	}
+static inline void
+_pthread_priority_split(pthread_priority_t pp, qos_class_t *qc, int *relpri)
+{
+	thread_qos_t qos = _pthread_priority_thread_qos(pp);
+	if (qc) *qc = _pthread_qos_class_from_thread_qos(qos);
+	if (relpri) *relpri = _pthread_priority_relpri(pp);
+}
 
-	return ret;
+void
+_pthread_set_main_qos(pthread_priority_t qos)
+{
+	_main_qos = (uint32_t)qos;
 }
 
 int
-pthread_attr_get_qos_class_np(pthread_attr_t * __restrict __attr,
-							  qos_class_t * __restrict __qos_class,
-							  int * __restrict __relative_priority)
+pthread_attr_set_qos_class_np(pthread_attr_t *attr, qos_class_t qc, int relpri)
 {
-	if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-		return ENOTSUP;
+	thread_qos_t qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+	if (attr->sig != _PTHREAD_ATTR_SIG || attr->schedset) {
+		return EINVAL;
 	}
 
-	int ret = EINVAL;
-	if (__attr->sig == _PTHREAD_ATTR_SIG) {
-		if (__attr->qosset) {
-			qos_class_t qos; int relpri;
-			_pthread_priority_split_newest(__attr->qosclass, qos, relpri);
+	attr->qosclass = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
+	attr->qosset = 1;
+	attr->schedset = 0;
+	return 0;
+}
 
-			if (__qos_class) { *__qos_class = qos; }
-			if (__relative_priority) { *__relative_priority = relpri; }
-		} else {
-			if (__qos_class) { *__qos_class = 0; }
-			if (__relative_priority) { *__relative_priority = 0; }
-		}
-		ret = 0;
+int
+pthread_attr_get_qos_class_np(pthread_attr_t *attr, qos_class_t *qc, int *relpri)
+{
+	if (attr->sig != _PTHREAD_ATTR_SIG) {
+		return EINVAL;
 	}
 
-	return ret;
+	_pthread_priority_split(attr->qosset ? attr->qosclass : 0, qc, relpri);
+	return 0;
 }
 
 int
-pthread_set_qos_class_self_np(qos_class_t __qos_class,
-							  int __relative_priority)
+pthread_set_qos_class_self_np(qos_class_t qc, int relpri)
 {
-	if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-		return ENOTSUP;
-	}
-
-	if (__relative_priority > 0 || __relative_priority < QOS_MIN_RELATIVE_PRIORITY) {
+	thread_qos_t qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+	if (!qos) {
 		return EINVAL;
 	}
 
-	pthread_priority_t priority = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
-
-	if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-		return _pthread_set_properties_self(_PTHREAD_SET_SELF_QOS_FLAG, priority, 0);
-	} else {
-		/* We set the thread QoS class in the TSD and then call into the kernel to
-		 * read the value out of it and set the QoS class.
-		 */
-		_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
-		mach_port_t kport = _pthread_kernel_thread(pthread_self());
-		int res = __bsdthread_ctl(BSDTHREAD_CTL_SET_QOS, kport, &pthread_self()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS], 0);
-
-		if (res == -1) {
-			res = errno;
-		}
-
-		return res;
-	}
+	pthread_priority_t pp = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
+	return _pthread_set_properties_self(_PTHREAD_SET_SELF_QOS_FLAG, pp, 0);
 }
 
 int
-pthread_set_qos_class_np(pthread_t __pthread,
-						 qos_class_t __qos_class,
-						 int __relative_priority)
+pthread_set_qos_class_np(pthread_t thread, qos_class_t qc, int relpri)
 {
-	if (__pthread != pthread_self()) {
+	if (thread != pthread_self()) {
 		/* The kext now enforces this anyway, if we check here too, it allows us to call
 		 * _pthread_set_properties_self later if we can.
 		 */
 		return EPERM;
 	}
-
-	return pthread_set_qos_class_self_np(__qos_class, __relative_priority);
+	return pthread_set_qos_class_self_np(qc, relpri);
 }
 
 int
-pthread_get_qos_class_np(pthread_t __pthread,
-						 qos_class_t * __restrict __qos_class,
-						 int * __restrict __relative_priority)
+pthread_get_qos_class_np(pthread_t thread, qos_class_t *qc, int *relpri)
 {
-	if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-		return ENOTSUP;
-	}
-
-	pthread_priority_t priority;
-
-	if (__pthread == pthread_self()) {
-		priority = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-	} else {
-		priority = __pthread->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
-	}
-
-	qos_class_t qos; int relpri;
-	_pthread_priority_split_newest(priority, qos, relpri);
-
-	if (__qos_class) { *__qos_class = qos; }
-	if (__relative_priority) { *__relative_priority = relpri; }
-
+	pthread_priority_t pp = thread->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
+	_pthread_priority_split(pp, qc, relpri);
 	return 0;
 }
 
 qos_class_t
 qos_class_self(void)
 {
-	if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-		return QOS_CLASS_UNSPECIFIED;
-	}
-
-	pthread_priority_t p = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
-	qos_class_t c = _pthread_priority_get_qos_newest(p);
-
-	return c;
+	pthread_priority_t pp;
+	pp = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS);
+	return _pthread_qos_class_from_thread_qos(_pthread_priority_thread_qos(pp));
 }
 
 qos_class_t
 qos_class_main(void)
 {
-	return _pthread_priority_get_qos_newest(_main_qos);
+	pthread_priority_t pp = _main_qos;
+	return _pthread_qos_class_from_thread_qos(_pthread_priority_thread_qos(pp));
 }
 
 pthread_priority_t
-_pthread_qos_class_encode(qos_class_t qos_class, int relative_priority, unsigned long flags)
+_pthread_qos_class_encode(qos_class_t qc, int relpri, unsigned long flags)
 {
-	return _pthread_priority_make_newest(qos_class, relative_priority, flags);
+	thread_qos_t qos = _pthread_qos_class_to_thread_qos(qc);
+	return _pthread_priority_make_from_thread_qos(qos, relpri, flags);
 }
 
 qos_class_t
-_pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, unsigned long *flags)
+_pthread_qos_class_decode(pthread_priority_t pp, int *relpri, unsigned long *flags)
 {
-	qos_class_t qos; int relpri;
-
-	_pthread_priority_split_newest(priority, qos, relpri);
-
-	if (relative_priority) { *relative_priority = relpri; }
-	if (flags) { *flags = _pthread_priority_get_flags(priority); }
-	return qos;
+	qos_class_t qc;
+	_pthread_priority_split(pp, &qc, relpri);
+	if (flags) *flags = (pp & _PTHREAD_PRIORITY_FLAGS_MASK);
+	return qc;
 }
 
 // Encode a legacy workqueue API priority into a pthread_priority_t. This API
@@ -222,35 +193,48 @@ _pthread_qos_class_decode(pthread_priority_t priority, int *relative_priority, u
 pthread_priority_t
 _pthread_qos_class_encode_workqueue(int queue_priority, unsigned long flags)
 {
+	thread_qos_t qos;
 	switch (queue_priority) {
-	case WORKQ_HIGH_PRIOQUEUE:
-		return _pthread_priority_make_newest(QOS_CLASS_USER_INITIATED, 0, flags);
-	case WORKQ_DEFAULT_PRIOQUEUE:
-		return _pthread_priority_make_newest(QOS_CLASS_DEFAULT, 0, flags);
-	case WORKQ_LOW_PRIOQUEUE:
+	case WORKQ_HIGH_PRIOQUEUE:      qos = THREAD_QOS_USER_INTERACTIVE; break;
+	case WORKQ_DEFAULT_PRIOQUEUE:   qos = THREAD_QOS_LEGACY; break;
 	case WORKQ_NON_INTERACTIVE_PRIOQUEUE:
-		return _pthread_priority_make_newest(QOS_CLASS_UTILITY, 0, flags);
-	case WORKQ_BG_PRIOQUEUE:
-		return _pthread_priority_make_newest(QOS_CLASS_BACKGROUND, 0, flags);
-	/* Legacy dispatch does not use QOS_CLASS_MAINTENANCE, so no need to handle it here */
+	case WORKQ_LOW_PRIOQUEUE:       qos = THREAD_QOS_UTILITY; break;
+	case WORKQ_BG_PRIOQUEUE:        qos = THREAD_QOS_BACKGROUND; break;
 	default:
 		__pthread_abort();
 	}
+	return _pthread_priority_make_from_thread_qos(qos, 0, flags);
 }
 
+#define _PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP \
+		(_PTHREAD_SET_SELF_QOS_FLAG | _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG | \
+		 _PTHREAD_SET_SELF_TIMESHARE_FLAG)
+
 int
-_pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t priority, mach_port_t voucher)
+_pthread_set_properties_self(_pthread_set_flags_t flags,
+		pthread_priority_t priority, mach_port_t voucher)
 {
-	if (!(__pthread_supported_features & PTHREAD_FEATURE_SETSELF)) {
-		return ENOTSUP;
+	pthread_t self = pthread_self();
+	_pthread_set_flags_t kflags = flags;
+	int rv = 0;
+
+	if (self->wqoutsideqos && (flags & _PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP)) {
+		// A number of properties cannot be altered if we are a workloop
+		// thread that has outside of QoS properties applied to it.
+		kflags &= ~_PTHREAD_SET_SELF_OUTSIDE_QOS_SKIP;
+		if (kflags == 0) goto skip;
 	}
 
-	int rv = __bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, flags);
+	rv = __bsdthread_ctl(BSDTHREAD_CTL_SET_SELF, priority, voucher, kflags);
 
-	/* Set QoS TSD if we succeeded or only failed the voucher half. */
+skip:
+	 // Set QoS TSD if we succeeded, or only failed the voucher portion of the
+	 // call. Additionally, if we skipped setting QoS because of outside-of-QoS
+	 // attributes then we still want to set the TSD in userspace.
 	if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
 		if (rv == 0 || errno == ENOENT) {
-			_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
+			_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, 
+					priority);
 		}
 	}
 
@@ -263,37 +247,21 @@ _pthread_set_properties_self(_pthread_set_flags_t flags, pthread_priority_t prio
 int
 pthread_set_fixedpriority_self(void)
 {
-	if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-		return ENOTSUP;
-	}
-
-	if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-		return _pthread_set_properties_self(_PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG, 0, 0);
-	} else {
-		return ENOTSUP;
-	}
+	return _pthread_set_properties_self(_PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG, 0, 0);
 }
 
 int
 pthread_set_timeshare_self(void)
 {
-	if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
-		return ENOTSUP;
-	}
-
-	if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
-		return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0);
-	} else {
-		return ENOTSUP;
-	}
+	return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0);
 }
 
-
 pthread_override_t
-pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_class, int __relative_priority)
+pthread_override_qos_class_start_np(pthread_t thread,  qos_class_t qc, int relpri)
 {
 	pthread_override_t rv;
 	kern_return_t kr;
+	thread_qos_t qos;
 	int res = 0;
 
 	/* For now, we don't have access to malloc. So we'll have to vm_allocate this, which means the tiny struct is going
@@ -301,23 +269,30 @@ pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_clas
 	 */
 	bool did_malloc = true;
 
+	qos = _pthread_validate_qos_class_and_relpri(qc, relpri);
+	if (qos == THREAD_QOS_UNSPECIFIED) {
+		return (_Nonnull pthread_override_t)NULL;
+	}
+
 	mach_vm_address_t vm_addr = malloc(sizeof(struct pthread_override_s));
 	if (!vm_addr) {
 		vm_addr = vm_page_size;
 		did_malloc = false;
 
-		kr = mach_vm_allocate(mach_task_self(), &vm_addr, round_page(sizeof(struct pthread_override_s)), VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH) | VM_FLAGS_ANYWHERE);
+		kr = mach_vm_allocate(mach_task_self(), &vm_addr,
+				round_page(sizeof(struct pthread_override_s)),
+				VM_MAKE_TAG(VM_MEMORY_LIBDISPATCH) | VM_FLAGS_ANYWHERE);
 		if (kr != KERN_SUCCESS) {
 			errno = ENOMEM;
-			return (_Nonnull pthread_override_t) NULL;
+			return (_Nonnull pthread_override_t)NULL;
 		}
 	}
 
 	rv = (pthread_override_t)vm_addr;
 	rv->sig = PTHREAD_OVERRIDE_SIGNATURE;
-	rv->pthread = __pthread;
-	rv->kthread = pthread_mach_thread_np(__pthread);
-	rv->priority = _pthread_priority_make_newest(__qos_class, __relative_priority, 0);
+	rv->pthread = thread;
+	rv->kthread = pthread_mach_thread_np(thread);
+	rv->priority = _pthread_priority_make_from_thread_qos(qos, relpri, 0);
 	rv->malloced = did_malloc;
 
 	/* To ensure that the kernel port that we keep stays valid, we retain it here. */
@@ -342,7 +317,7 @@ pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_clas
 		}
 		rv = NULL;
 	}
-	return (_Nonnull pthread_override_t) rv;
+	return (_Nonnull pthread_override_t)rv;
 }
 
 int
@@ -523,7 +498,11 @@ _pthread_workqueue_parallelism_for_priority(int qos, unsigned long flags)
 int
 pthread_qos_max_parallelism(qos_class_t qos, unsigned long flags)
 {
-	int thread_qos = _pthread_qos_class_to_thread_qos(qos);
+	thread_qos_t thread_qos;
+	if (qos == QOS_CLASS_UNSPECIFIED) {
+		qos = QOS_CLASS_DEFAULT; // <rdar://problem/35080198>
+	}
+	thread_qos = _pthread_qos_class_to_thread_qos(qos);
 	if (thread_qos == THREAD_QOS_UNSPECIFIED) {
 		errno = EINVAL;
 		return -1;
diff --git a/src/thread_setup.c b/src/thread_setup.c
index 761103e..22cc689 100644
--- a/src/thread_setup.c
+++ b/src/thread_setup.c
@@ -45,23 +45,24 @@
  * MkLinux
  */
 
+#include "internal.h"
+
+#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
 /*
  * Machine specific support for thread initialization
  */
 
-#include "internal.h"
-#include <platform/string.h>
+// NOTE: no resolvers, so this file must not contain any atomic operations
 
+PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp);
 
 /*
  * Set up the initial state of a MACH thread
  */
 void
-_pthread_setup(pthread_t thread,
+_pthread_setup_suspended(pthread_t thread,
 	       void (*routine)(pthread_t),
-	       void *vsp,
-	       int suspended,
-	       int needresume)
+	       void *vsp)
 {
 #if defined(__i386__)
 	i386_thread_state_t state = { };
@@ -71,20 +72,12 @@ _pthread_setup(pthread_t thread,
 	x86_thread_state64_t state = { };
 	thread_state_flavor_t flavor = x86_THREAD_STATE64;
 	mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
-#elif defined(__arm__)
-	arm_thread_state_t state = { };
-	thread_state_flavor_t flavor = ARM_THREAD_STATE;
-	mach_msg_type_number_t count = ARM_THREAD_STATE_COUNT;
 #else
 #error _pthread_setup not defined for this architecture
 #endif
 
-	if (suspended) {
-		(void)thread_get_state(_pthread_kernel_thread(thread),
-				     flavor,
-				     (thread_state_t)&state,
-				     &count);
-	}
+	(void)thread_get_state(_pthread_kernel_thread(thread),
+			flavor, (thread_state_t)&state, &count);
 
 #if defined(__i386__)
 	uintptr_t *sp = vsp;
@@ -110,46 +103,10 @@ _pthread_setup(pthread_t thread,
 	state.__rdi = (uintptr_t)thread;	// argument to function
 	*--sp = 0;				// fake return address
 	state.__rsp = (uintptr_t)sp;		// set stack pointer
-#elif defined(__arm__)
-	state.__pc = (uintptr_t)routine;
-
-	// Detect switch to thumb mode.
-	if (state.__pc & 1) {
-	    state.__pc &= ~1;
-	    state.__cpsr |= 0x20; /* PSR_THUMB */
-	}
-
-	state.__sp = (uintptr_t)vsp - C_ARGSAVE_LEN - C_RED_ZONE;
-	state.__r[0] = (uintptr_t)thread;
 #else
-#error _pthread_setup not defined for this architecture
+#error _pthread_setup_suspended not defined for this architecture
 #endif
 
-	if (suspended) {
-		(void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count);
-		if (needresume) {
-			(void)thread_resume(_pthread_kernel_thread(thread));
-		}
-	} else {
-		mach_port_t kernel_thread;
-		(void)thread_create_running(mach_task_self(), flavor, (thread_state_t)&state, count, &kernel_thread);
-		_pthread_set_kernel_thread(thread, kernel_thread);
-	}
-}
-
-// pthread_setup initializes large structures to 0, which the compiler turns into a library call to memset. To avoid linking against
-// Libc, provide a simple wrapper that calls through to the libplatform primitives
-
-#undef memset
-__attribute__((visibility("hidden"))) void *
-memset(void *b, int c, size_t len)
-{
-	return _platform_memset(b, c, len);
-}
-
-#undef bzero
-__attribute__((visibility("hidden"))) void
-bzero(void *s, size_t n)
-{
-	_platform_bzero(s, n);
+	(void)thread_set_state(_pthread_kernel_thread(thread), flavor, (thread_state_t)&state, count);
 }
+#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX
diff --git a/tests/Makefile b/tests/Makefile
index 408b101..84e2717 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -12,10 +12,13 @@ include $(DEVELOPER_DIR)/AppleInternal/Makefiles/darwintest/Makefile.common
 TARGETS :=
 TARGETS += atfork
 TARGETS += bsdthread_set_self
+TARGETS += stack
+TARGETS += stack_size
 TARGETS += cond
 #TARGETS += cond_hang3
 #TARGETS += cond_stress
 TARGETS += cond_timed
+TARGETS += cond_prepost
 TARGETS += custom_stack
 TARGETS += stack_aslr
 TARGETS += join
@@ -24,6 +27,7 @@ TARGETS += main_stack_custom
 TARGETS += detach
 #TARGETS += maxwidth
 TARGETS += mutex
+TARGETS += mutex_prepost
 TARGETS += mutex_try
 TARGETS += once_cancel
 TARGETS += pthread_attr_setstacksize
@@ -35,6 +39,7 @@ TARGETS += pthread_introspection
 TARGETS += pthread_setspecific
 TARGETS += pthread_threadid_np
 TARGETS += pthread_get_qos_class_np
+TARGETS += pthread_dependency
 #TARGETS += qos
 TARGETS += rdar_32848402
 #TARGETS += rwlock-22244050
@@ -47,6 +52,7 @@ TARGETS += tsd
 #TARGETS += wq_kevent_stress
 TARGETS += wq_limits
 TARGETS += add_timer_termination
+TARGETS += perf_contended_mutex_rwlock
 
 OTHER_LTE_INCLUDE_FILES += \
 	/usr/local/lib/libdarwintest_utils.dylib
@@ -54,7 +60,7 @@ OTHER_LTE_INCLUDE_FILES += \
 OTHER_CFLAGS := -DDARWINTEST -Weverything \
 		-Wno-vla -Wno-bad-function-cast -Wno-missing-noreturn \
 		-Wno-missing-field-initializers -Wno-format-pedantic \
-		-Wno-gnu-folding-constant
+		-Wno-gnu-folding-constant -Wno-used-but-marked-unused
 OTHER_LDFLAGS := -ldarwintest_utils
 
 #TARGETS += main_stack_legacy // Disabled by default due to linker warnings
@@ -63,8 +69,8 @@ OTHER_LDFLAGS := -ldarwintest_utils
 #main_stack_legacy: ARCH_FLAGS = -arch i386
 #main_stack_legacy: DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=10.7
 
-main_stack_custom: OTHER_LDFLAGS += -Wl,-stack_size,0x14000
-main_stack_custom: OTHER_CFLAGS += -DSTACKSIZE=0x14000
+main_stack_custom: OTHER_LDFLAGS += -Wl,-stack_size,0x124000
+main_stack_custom: OTHER_CFLAGS += -DSTACKSIZE=0x124000
 
 bsdthread_set_self: OTHER_CFLAGS += -D_DARWIN_FEATURE_CLOCK_GETTIME
 
diff --git a/tests/cond_prepost.c b/tests/cond_prepost.c
new file mode 100644
index 0000000..df8b86e
--- /dev/null
+++ b/tests/cond_prepost.c
@@ -0,0 +1,217 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <TargetConditionals.h>
+
+#include <pthread/pthread_spis.h>
+
+#include <sys/sysctl.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_multiprocess.h>
+
+// <rdar://problem/38810583> this test case is intended to test for the
+// specific issue found in this radar. That is, if:
+//
+//     1. A mutex is in first-fit policy mode, and
+//     2. is used as the mutex in a pthread_cond_wait (or timedwait), and
+//     3. the mutex has the K-bit set but has no kernel waiters, and
+//     4. the cvwait call preposts an unlock to the mutex
+//
+//  Under these conditions, the fact that the cvwait preposted an unlock to
+//  the paired mutex is lost during the call. The P-bit was never returned to
+//  userspace and the kwq in the kernel would continue to exist. If the same
+//  uaddr is then reused as another synchroniser type then we would often
+//  return EINVAL from the wait/lock function.
+//
+//  So this test is attempting to:
+//
+//     1. Repeatedly bang on a mutex+cvar for a number of iterations in the
+//        hope of triggering a cvwait prepost situation.
+//     2. Then destroy both the mutex and cvar, and reinitialise each memory
+//        location as the opposite type of synchroniser. Then cvwait once to
+//        trigger the failure condition.
+
+struct context {
+	union {
+		pthread_mutex_t mutex;
+		pthread_cond_t cond;
+	};
+	union {
+		pthread_mutex_t mutex2;
+		pthread_cond_t cond2;
+	};
+	long value;
+	long count;
+	long waiter;
+};
+
+static void *test_cond(void *ptr) {
+	struct context *context = ptr;
+	int res;
+
+	res = pthread_cond_wait(&context->cond, &context->mutex2);
+	T_ASSERT_POSIX_ZERO(res, "condition wait on condvar completed");
+	res = pthread_mutex_unlock(&context->mutex2);
+	T_ASSERT_POSIX_ZERO(res, "unlock condvar mutex");
+	return NULL;
+}
+
+static void *test_cond_wake(void *ptr) {
+	struct context *context = ptr;
+	int res;
+
+	res = pthread_mutex_lock(&context->mutex2);
+	T_ASSERT_POSIX_ZERO(res, "locked condvar mutex");
+	res = pthread_cond_signal(&context->cond);
+	T_ASSERT_POSIX_ZERO(res, "condvar signalled");
+	res = pthread_mutex_unlock(&context->mutex2);
+	T_ASSERT_POSIX_ZERO(res, "dropped condvar mutex");
+
+	return NULL;
+}
+
+static void *test_thread(void *ptr) {
+	int res;
+	long old;
+	struct context *context = ptr;
+
+	int i = 0;
+	char *str;
+
+	do {
+		bool try = i++ & 1;
+		bool cond = i & 16;
+
+		if (!try){
+			str = "pthread_mutex_lock";
+			res = pthread_mutex_lock(&context->mutex);
+		} else {
+			str = "pthread_mutex_trylock";
+			res = pthread_mutex_trylock(&context->mutex);
+		}
+		if (res != 0) {
+			if (try && res == EBUSY) {
+				continue;
+			}
+			T_ASSERT_POSIX_ZERO(res, "[%ld] %s", context->count, str);
+		}
+
+		old = __sync_fetch_and_or(&context->value, 1);
+		if ((old & 1) != 0) {
+			T_FAIL("[%ld] OR %lx\n", context->count, old);
+		}
+
+		old = __sync_fetch_and_and(&context->value, 0);
+		if ((old & 1) == 0) {
+			T_FAIL("[%ld] AND %lx\n", context->count, old);
+		}
+
+		if (cond && !context->waiter) {
+			context->waiter = 1;
+			struct timespec ts = {
+				.tv_sec = 0,
+				.tv_nsec = 10ull * NSEC_PER_MSEC,
+			};
+
+			res = pthread_cond_timedwait_relative_np(&context->cond2, &context->mutex, &ts);
+			if (res == ETIMEDOUT) {
+				// ignore, should be the last thread out
+			} else if (res) {
+				T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_cond_wait",
+						context->count);
+			}
+			context->waiter = 0;
+			res = pthread_mutex_unlock(&context->mutex);
+			if (res) {
+				T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_unlock",
+						context->count);
+			}
+		} else {
+			if (context->waiter) {
+				res = pthread_cond_broadcast(&context->cond2);
+				if (res) {
+					T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_cond_broadcast",
+							context->count);
+				}
+			}
+			res = pthread_mutex_unlock(&context->mutex);
+			if (res) {
+				T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_unlock",
+						context->count);
+			}
+		}
+	} while (__sync_fetch_and_sub(&context->count, 1) > 0);
+	return NULL;
+}
+
+
+static void
+_test_condvar_prepost_race(void)
+{
+	struct context context = {
+		.mutex = PTHREAD_MUTEX_INITIALIZER,
+		.cond2 = PTHREAD_COND_INITIALIZER,
+		.value = 0,
+		.count = 10000,
+		.waiter = false,
+	};
+	int i;
+	int res;
+	int threads = 8;
+	pthread_t p[threads];
+	for (i = 0; i < threads; ++i) {
+		res = pthread_create(&p[i], NULL, test_thread, &context);
+		T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+	}
+	for (i = 0; i < threads; ++i) {
+		res = pthread_join(p[i], NULL);
+		T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+	}
+
+	T_PASS("initial pthread mutex storm completed");
+
+	pthread_mutex_destroy(&context.mutex);
+	pthread_cond_destroy(&context.cond2);
+
+	pthread_mutex_init(&context.mutex2, NULL);
+	pthread_cond_init(&context.cond, NULL);
+	res = pthread_mutex_lock(&context.mutex2);
+	T_ASSERT_POSIX_ZERO(res, "mutex lock for condition wait");
+	res = pthread_create(&p[0], NULL, test_cond, &context);
+	T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+	res = pthread_create(&p[1], NULL, test_cond_wake, &context);
+	T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+
+	res = pthread_join(p[0], NULL);
+	T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+	res = pthread_join(p[1], NULL);
+	T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+
+	pthread_cond_destroy(&context.cond);
+}
+
+T_DECL(cond_prepost_fairshare, "cond_prepost_fairshare (fairshare)",
+	T_META_ALL_VALID_ARCHS(YES),
+	T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=1"))
+{
+	int i;
+	int count = 100;
+	for (i=0; i < count; i++) {
+		_test_condvar_prepost_race();
+	}
+}
+
+T_DECL(cond_prepost_firstfit, "cond_prepost_firstfit (firstfit)",
+	T_META_ALL_VALID_ARCHS(YES),
+	T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
+{
+	int i;
+	int count = 100;
+	for (i=0; i < count; i++) {
+		_test_condvar_prepost_race();
+	}
+}
diff --git a/tests/main_stack_custom.c b/tests/main_stack_custom.c
index eb0d660..2e992a8 100644
--- a/tests/main_stack_custom.c
+++ b/tests/main_stack_custom.c
@@ -14,7 +14,7 @@ T_DECL(main_stack_custom, "tests the reported values for a custom main thread st
 
 	struct rlimit lim;
 	T_QUIET; T_ASSERT_POSIX_SUCCESS(getrlimit(RLIMIT_STACK, &lim), NULL);
-	lim.rlim_cur = lim.rlim_cur / 8;
+	lim.rlim_cur = lim.rlim_cur + 32 * PAGE_SIZE;
 	T_EXPECT_EQ(setrlimit(RLIMIT_STACK, &lim), -1, "setrlimit for stack should fail with custom stack");
 	T_EXPECT_EQ((size_t)STACKSIZE, pthread_get_stacksize_np(pthread_self()), "reported stacksize shouldn't change");
 }
diff --git a/tests/mutex.c b/tests/mutex.c
index 0b1e1d4..9fe0277 100644
--- a/tests/mutex.c
+++ b/tests/mutex.c
@@ -4,6 +4,7 @@
 #include <unistd.h>
 #include <stdbool.h>
 #include <errno.h>
+#include <TargetConditionals.h>
 
 #include <pthread/pthread_spis.h>
 
@@ -104,7 +105,7 @@ check_process_default_mutex_policy(int expected_policy)
 T_DECL(mutex_default_policy,
 		"Tests that the default mutex policy is fairshare")
 {
-	check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FAIRSHARE);
+	check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT);
 }
 
 T_DECL(mutex_default_policy_sysctl,
@@ -133,7 +134,7 @@ T_HELPER_DECL(mutex_default_policy_sysctl_helper, "sysctl helper")
 
 T_DECL(mutex_default_policy_envvar,
 		"Tests that setting the policy environment variable changes the default policy",
-		T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=2"))
+		T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
 {
 	check_process_default_mutex_policy(_PTHREAD_MUTEX_POLICY_FIRSTFIT);
 }
diff --git a/tests/mutex_prepost.c b/tests/mutex_prepost.c
new file mode 100644
index 0000000..6423e20
--- /dev/null
+++ b/tests/mutex_prepost.c
@@ -0,0 +1,157 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <TargetConditionals.h>
+
+#include <pthread/pthread_spis.h>
+
+#include <sys/sysctl.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_multiprocess.h>
+
+struct context {
+	union {
+		pthread_mutex_t mutex;
+		pthread_cond_t cond;
+	};
+	pthread_mutex_t mutex2;
+	long value;
+	long count;
+};
+
+static void *test_cond(void *ptr) {
+	struct context *context = ptr;
+	int res;
+	
+	res = pthread_cond_wait(&context->cond, &context->mutex2);
+	T_ASSERT_POSIX_ZERO(res, "condition wait on condvar completed");
+	res = pthread_mutex_unlock(&context->mutex2);
+	T_ASSERT_POSIX_ZERO(res, "unlock condvar mutex");
+	return NULL;
+}
+
+static void *test_cond_wake(void *ptr) {
+	struct context *context = ptr;
+	int res;
+	
+	res = pthread_mutex_lock(&context->mutex2);
+	T_ASSERT_POSIX_ZERO(res, "locked condvar mutex");
+	res = pthread_cond_signal(&context->cond);
+	T_ASSERT_POSIX_ZERO(res, "condvar signalled");
+	res = pthread_mutex_unlock(&context->mutex2);
+	T_ASSERT_POSIX_ZERO(res, "dropped condvar mutex");
+
+	return NULL;
+}
+
+static void *test_thread(void *ptr) {
+	int res;
+	long old;
+	struct context *context = ptr;
+
+	int i = 0;
+	char *str;
+
+	do {
+		bool try = i++ & 1;
+
+		if (!try){
+			str = "pthread_mutex_lock";
+			res = pthread_mutex_lock(&context->mutex);
+		} else {
+			str = "pthread_mutex_trylock";
+			res = pthread_mutex_trylock(&context->mutex);
+		}
+		if (res != 0) {
+			if (try && res == EBUSY) {
+				continue;
+			}
+			T_ASSERT_POSIX_ZERO(res, "[%ld] %s", context->count, str);
+		}
+		
+		old = __sync_fetch_and_or(&context->value, 1);
+		if ((old & 1) != 0) {
+			T_FAIL("[%ld] OR %lx\n", context->count, old);
+		}
+
+		old = __sync_fetch_and_and(&context->value, 0);
+		if ((old & 1) == 0) {
+			T_FAIL("[%ld] AND %lx\n", context->count, old);
+		}
+	
+		res = pthread_mutex_unlock(&context->mutex);
+		if (res) {
+			T_ASSERT_POSIX_ZERO(res, "[%ld] pthread_mutex_lock", context->count);
+		}
+	} while (__sync_fetch_and_sub(&context->count, 1) > 0);
+	return NULL;
+}
+
+
+static void
+_test_condvar_prepost_race(void)
+{
+	struct context context = {
+		.mutex = PTHREAD_MUTEX_INITIALIZER,
+		.mutex2 = PTHREAD_MUTEX_INITIALIZER,
+		.value = 0,
+		.count = 1000,
+	};
+	int i;
+	int res;
+	int threads = 8;
+	pthread_t p[threads];
+	for (i = 0; i < threads; ++i) {
+		res = pthread_create(&p[i], NULL, test_thread, &context);
+		T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+	}
+	for (i = 0; i < threads; ++i) {
+		res = pthread_join(p[i], NULL);
+		T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+	}
+
+	T_PASS("initial pthread mutex storm completed");
+
+	pthread_mutex_destroy(&context.mutex);
+
+	pthread_cond_init(&context.cond, NULL);
+	res = pthread_mutex_lock(&context.mutex2);
+	T_ASSERT_POSIX_ZERO(res, "mutex lock for condition wait");
+	res = pthread_create(&p[0], NULL, test_cond, &context);
+	T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+	res = pthread_create(&p[1], NULL, test_cond_wake, &context);
+	T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_create()");
+
+	res = pthread_join(p[0], NULL);
+	T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+	res = pthread_join(p[1], NULL);
+	T_QUIET; T_ASSERT_POSIX_ZERO(res, "pthread_join()");
+
+	pthread_cond_destroy(&context.cond);
+}
+
+T_DECL(mutex_prepost_fairshare, "pthread_mutex_prepost (fairshare)",
+	T_META_ALL_VALID_ARCHS(YES),
+	T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=1"))
+{
+	int i;
+	int count = 100;
+	for (i=0; i < count; i++) {
+		_test_condvar_prepost_race();
+	}
+}
+
+T_DECL(mutex_prepost_firstfit, "pthread_mutex_prepost (firstfit)",
+	T_META_ALL_VALID_ARCHS(YES),
+	T_META_ENVVAR("PTHREAD_MUTEX_DEFAULT_POLICY=3"))
+{
+	int i;
+	int count = 100;
+	for (i=0; i < count; i++) {
+		_test_condvar_prepost_race();
+	}
+}
diff --git a/tests/perf_contended_mutex_rwlock.c b/tests/perf_contended_mutex_rwlock.c
new file mode 100644
index 0000000..e4219c5
--- /dev/null
+++ b/tests/perf_contended_mutex_rwlock.c
@@ -0,0 +1,519 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdatomic.h>
+#include <math.h>
+#include <unistd.h>
+#include <sys/sysctl.h>
+#include <mach/mach.h>
+#include <pthread.h>
+#include <pthread/pthread_spis.h>
+#include <os/lock.h>
+#include <darwintest.h>
+
+// number of times the lock is taken per dt_stat batch
+#define ITERATIONS_PER_DT_STAT_BATCH 10000ull
+// number of times the contended mutex is taken per dt_stat batch
+#define ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX 1000ull
+// shift determining power of 2 factor of time spent by worker threads in the
+// busy() function while outside of the lock vs inside the lock
+#define OUTER_VS_INNER_SHIFT 4
+// fraction of read lock vs write lock acquires
+#define RDLOCK_FRACTION 0.99f
+// maintain and print progress counters in between measurement batches
+#define COUNTERS 0
+
+// move the darwintest assertion code out of the straight line execution path
+// since it is has non-trivial overhead and codegen impact even if the assertion
+// is never triggered.
+#define iferr(_e) if(__builtin_expect(!!(_e), 0))
+
+#pragma mark -
+
+uint64_t
+random_busy_counts(unsigned int *seed, uint64_t *inner, uint64_t *outer)
+{
+	uint64_t random = rand_r(seed);
+	const uint64_t of = (1 << OUTER_VS_INNER_SHIFT);
+	*inner = 0x4 + (random & (0x10 - 1));
+	*outer = 0x4 * of + ((random >> 4) & (0x10 * of - 1));
+	return random;
+}
+
+// By default busy() does cpu busy work for a passed in number of iterations
+enum {
+	busy_is_nothing = 0,
+	busy_is_cpu_busy,
+	busy_is_cpu_yield,
+};
+static int busy_select = busy_is_cpu_busy;
+
+static double
+cpu_busy(uint64_t n)
+{
+	double d = M_PI;
+	uint64_t i;
+	for (i = 0; i < n; i++) d *= M_PI;
+	return d;
+}
+
+static double
+cpu_yield(uint64_t n)
+{
+	uint64_t i;
+	for (i = 0; i < n; i++) {
+#if defined(__arm__) || defined(__arm64__)
+	asm volatile("yield");
+#elif defined(__x86_64__) || defined(__i386__)
+	asm volatile("pause");
+#else
+#error Unrecognized architecture
+#endif
+	}
+	return 0;
+}
+
+__attribute__((noinline))
+static double
+busy(uint64_t n)
+{
+	switch(busy_select) {
+	case busy_is_cpu_busy:
+		return cpu_busy(n);
+	case busy_is_cpu_yield:
+		return cpu_yield(n);
+	default:
+		return 0;
+	}
+}
+
+#pragma mark -
+
+static semaphore_t ready_sem, start_sem, end_sem;
+static uint32_t nthreads;
+static _Atomic uint32_t active_thr;
+static _Atomic int64_t todo;
+uint64_t iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH;
+
+#if COUNTERS
+static _Atomic uint64_t total_locks, total_rdlocks, total_wrlocks;
+#define ctr_inc(_t) atomic_fetch_add_explicit(&(_t), 1, memory_order_relaxed)
+#else
+#define ctr_inc(_t)
+#endif
+
+static uint32_t
+ncpu(void)
+{
+	static uint32_t activecpu, physicalcpu;
+	if (!activecpu) {
+		uint32_t n;
+		size_t s = sizeof(n);
+		sysctlbyname("hw.activecpu", &n, &s, NULL, 0);
+		activecpu = n;
+		s = sizeof(n);
+		sysctlbyname("hw.physicalcpu", &n, &s, NULL, 0);
+		physicalcpu = n;
+	}
+	return MIN(activecpu, physicalcpu);
+}
+
+__attribute__((noinline))
+static void
+threaded_bench(dt_stat_time_t s, int batch_size)
+{
+	kern_return_t kr;
+	for (int i = 0; i < nthreads; i++) {
+		kr = semaphore_wait(ready_sem);
+		iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
+	}
+	atomic_init(&active_thr, nthreads);
+	atomic_init(&todo, batch_size * iterations_per_dt_stat_batch);
+	dt_stat_token t = dt_stat_begin(s);
+	kr = semaphore_signal_all(start_sem);
+	iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");}
+	kr = semaphore_wait(end_sem);
+	iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
+	dt_stat_end_batch(s, batch_size, t);
+}
+
+static void
+setup_threaded_bench(void* (*thread_fn)(void*), bool singlethreaded)
+{
+	kern_return_t kr;
+	int r;
+	char *e;
+
+	if (singlethreaded) {
+		nthreads = 1;
+	} else {
+		if ((e = getenv("DT_STAT_NTHREADS"))) nthreads = strtoul(e, NULL, 0);
+		if (nthreads < 2) nthreads = ncpu();
+	}
+	if ((e = getenv("DT_STAT_CPU_BUSY"))) busy_select = strtoul(e, NULL, 0);
+
+	kr = semaphore_create(mach_task_self(), &ready_sem, SYNC_POLICY_FIFO, 0);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+	kr = semaphore_create(mach_task_self(), &start_sem, SYNC_POLICY_FIFO, 0);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+	kr = semaphore_create(mach_task_self(), &end_sem, SYNC_POLICY_FIFO, 0);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
+
+	pthread_attr_t attr;
+	r = pthread_attr_init(&attr);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_init");
+	r = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_setdetachstate");
+
+	for (int i = 0; i < nthreads; i++) {
+		pthread_t th;
+		r = pthread_create(&th, &attr, thread_fn, (void *)(uintptr_t)(i+1));
+		T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_create");
+	}
+}
+
+#pragma mark -
+
+static pthread_mutex_t mutex;
+
+static void *
+mutex_bench_thread(void * arg)
+{
+	kern_return_t kr;
+	int r;
+	unsigned int seed;
+	volatile double dummy;
+
+restart:
+	seed = (uintptr_t)arg; // each thread repeats its own sequence
+	kr = semaphore_wait_signal(start_sem, ready_sem);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+	while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+		uint64_t inner, outer;
+		random_busy_counts(&seed, &inner, &outer);
+		dummy = busy(outer);
+		r = pthread_mutex_lock(&mutex);
+		iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");}
+		dummy = busy(inner);
+		r = pthread_mutex_unlock(&mutex);
+		iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");}
+		ctr_inc(total_locks);
+	}
+
+	if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+		kr = semaphore_signal(end_sem);
+		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+	}
+	goto restart;
+}
+
+static void
+mutex_bench(bool singlethreaded)
+{
+	int r;
+	int batch_size;
+#if COUNTERS
+	uint64_t batch = 0;
+#endif
+
+	setup_threaded_bench(mutex_bench_thread, singlethreaded);
+
+	pthread_mutexattr_t attr;
+	r = pthread_mutexattr_init(&attr);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init");
+	pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FAIRSHARE);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np");
+	r = pthread_mutex_init(&mutex, &attr);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init");
+
+	dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & "
+			"pthread_mutex_unlock (fairshare) on %u thread%s",
+			iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+	do {
+		batch_size = dt_stat_batch_size(s);
+		threaded_bench(s, batch_size);
+#if COUNTERS
+		fprintf(stderr, "\rbatch: %4llu\t size: %4d\tmutexes: %8llu",
+				++batch, batch_size,
+				atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+	} while (!dt_stat_stable(s));
+#if COUNTERS
+	fprintf(stderr, "\n");
+#endif
+	dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_mutex_bench, "Uncontended fairshare mutex",
+		T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+		T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+	mutex_bench(true);
+}
+
+T_DECL(perf_contended_mutex_bench, "Contended fairshare mutex",
+		T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+		T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+	iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX;
+	mutex_bench(false);
+}
+
+#pragma mark -
+
+static pthread_rwlock_t rwlock;
+
+static void *
+rwlock_bench_thread(void * arg)
+{
+	kern_return_t kr;
+	int r;
+	unsigned int seed;
+	volatile double dummy;
+	const uint64_t rand_rdlock_max = (double)RAND_MAX * RDLOCK_FRACTION;
+
+restart:
+	seed = (uintptr_t)arg; // each thread repeats its own sequence
+	kr = semaphore_wait_signal(start_sem, ready_sem);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+	while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+		uint64_t inner, outer;
+		uint64_t random = random_busy_counts(&seed, &inner, &outer);
+		dummy = busy(outer);
+		if (random < rand_rdlock_max) {
+			r = pthread_rwlock_rdlock(&rwlock);
+			iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_rdlock");}
+			dummy = busy(inner);
+			r = pthread_rwlock_unlock(&rwlock);
+			iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");}
+			ctr_inc(total_rdlocks);
+		} else {
+			r = pthread_rwlock_wrlock(&rwlock);
+			iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_wrlock");}
+			dummy = busy(inner);
+			r = pthread_rwlock_unlock(&rwlock);
+			iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");}
+			ctr_inc(total_wrlocks);
+		}
+		ctr_inc(total_locks);
+	}
+
+	if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+		kr = semaphore_signal(end_sem);
+		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+	}
+	goto restart;
+}
+
+static void
+rwlock_bench(bool singlethreaded)
+{
+	int r;
+	int batch_size;
+#if COUNTERS
+	uint64_t batch = 0;
+#endif
+
+	setup_threaded_bench(rwlock_bench_thread, singlethreaded);
+
+	r = pthread_rwlock_init(&rwlock, NULL);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_init");
+
+	dt_stat_time_t s = dt_stat_time_create("%llu pthread_rwlock_rd/wrlock & "
+			"pthread_rwlock_unlock (%.0f%% rdlock) on %u thread%s",
+			iterations_per_dt_stat_batch, RDLOCK_FRACTION * 100, nthreads,
+			nthreads > 1 ? "s" : "");
+	do {
+		batch_size = dt_stat_batch_size(s);
+		threaded_bench(s, batch_size);
+#if COUNTERS
+		fprintf(stderr, "\rbatch: %4llu\t size: %4d\trwlocks: %8llu\t"
+				"rd: %8llu\twr: %8llu", ++batch, batch_size,
+				atomic_load_explicit(&total_locks,   memory_order_relaxed),
+				atomic_load_explicit(&total_rdlocks, memory_order_relaxed),
+				atomic_load_explicit(&total_wrlocks, memory_order_relaxed));
+#endif
+	} while (!dt_stat_stable(s));
+#if COUNTERS
+	fprintf(stderr, "\n");
+#endif
+	dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_rwlock_bench, "Uncontended rwlock",
+		T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+		T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+	rwlock_bench(true);
+}
+
+T_DECL(perf_contended_rwlock_bench, "Contended rwlock",
+		T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+		T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+	rwlock_bench(false);
+}
+
+#pragma mark -
+
+static os_unfair_lock unfair_lock;
+
+static void *
+unfair_lock_bench_thread(void * arg)
+{
+	kern_return_t kr;
+	unsigned int seed;
+	volatile double dummy;
+
+restart:
+	seed = (uintptr_t)arg; // each thread repeats its own sequence
+	kr = semaphore_wait_signal(start_sem, ready_sem);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+	while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+		uint64_t inner, outer;
+		random_busy_counts(&seed, &inner, &outer);
+		dummy = busy(outer);
+		os_unfair_lock_lock(&unfair_lock);
+		dummy = busy(inner);
+		os_unfair_lock_unlock(&unfair_lock);
+		ctr_inc(total_locks);
+	}
+
+	if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+		kr = semaphore_signal(end_sem);
+		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+	}
+	goto restart;
+}
+
+static void
+unfair_lock_bench(bool singlethreaded)
+{
+	int r;
+	int batch_size;
+#if COUNTERS
+	uint64_t batch = 0;
+#endif
+
+	setup_threaded_bench(unfair_lock_bench_thread, singlethreaded);
+
+	dt_stat_time_t s = dt_stat_time_create("%llu os_unfair_lock_lock & "
+			"os_unfair_lock_unlock on %u thread%s",
+			iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+	do {
+		batch_size = dt_stat_batch_size(s);
+		threaded_bench(s, batch_size);
+#if COUNTERS
+		fprintf(stderr, "\rbatch: %4llu\t size: %4d\tunfair_locks: %8llu",
+				++batch, batch_size,
+				atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+	} while (!dt_stat_stable(s));
+#if COUNTERS
+	fprintf(stderr, "\n");
+#endif
+	dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_unfair_lock_bench, "Unontended unfair lock",
+		T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+		T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+	unfair_lock_bench(true);
+}
+
+T_DECL(perf_contended_unfair_lock_bench, "Contended unfair lock",
+		T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+		T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+	unfair_lock_bench(false);
+}
+
+#pragma mark -
+
+static pthread_mutex_t ffmutex;
+
+static void *
+ffmutex_bench_thread(void * arg)
+{
+	kern_return_t kr;
+	int r;
+	unsigned int seed;
+	volatile double dummy;
+
+restart:
+	seed = (uintptr_t)arg; // each thread repeats its own sequence
+	kr = semaphore_wait_signal(start_sem, ready_sem);
+	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
+
+	while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
+		uint64_t inner, outer;
+		random_busy_counts(&seed, &inner, &outer);
+		dummy = busy(outer);
+		r = pthread_mutex_lock(&ffmutex);
+		iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");}
+		dummy = busy(inner);
+		r = pthread_mutex_unlock(&ffmutex);
+		iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");}
+		ctr_inc(total_locks);
+	}
+
+	if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
+		kr = semaphore_signal(end_sem);
+		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
+	}
+	goto restart;
+}
+
+static void
+ffmutex_bench(bool singlethreaded)
+{
+	int r;
+	int batch_size;
+#if COUNTERS
+	uint64_t batch = 0;
+#endif
+
+	setup_threaded_bench(ffmutex_bench_thread, singlethreaded);
+
+	pthread_mutexattr_t attr;
+	r = pthread_mutexattr_init(&attr);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init");
+	pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np");
+	r = pthread_mutex_init(&ffmutex, &attr);
+	T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init");
+
+	dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & "
+			"pthread_mutex_unlock (first-fit) on %u thread%s",
+			iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
+	do {
+		batch_size = dt_stat_batch_size(s);
+		threaded_bench(s, batch_size);
+#if COUNTERS
+		fprintf(stderr, "\rbatch: %4llu\t size: %4d\tffmutexes: %8llu",
+				++batch, batch_size,
+				atomic_load_explicit(&total_locks, memory_order_relaxed));
+#endif
+	} while (!dt_stat_stable(s));
+#if COUNTERS
+	fprintf(stderr, "\n");
+#endif
+	dt_stat_finalize(s);
+}
+
+T_DECL(perf_uncontended_ffmutex_bench, "Uncontended first-fit mutex",
+		T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+		T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+	ffmutex_bench(true);
+}
+
+T_DECL(perf_contended_ffmutex_bench, "Contended first-fit mutex",
+		T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
+		T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
+{
+	ffmutex_bench(false);
+}
diff --git a/tests/pthread_dependency.c b/tests/pthread_dependency.c
new file mode 100644
index 0000000..a6fd316
--- /dev/null
+++ b/tests/pthread_dependency.c
@@ -0,0 +1,78 @@
+#include "darwintest_defaults.h"
+#include <darwintest_utils.h>
+#include <pthread/dependency_private.h>
+
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
+static struct job {
+	pthread_dependency_t *req;
+	useconds_t usleep;
+	int done;
+} job;
+
+static void *
+do_test(void *__unused arg)
+{
+	pthread_mutex_lock(&mutex);
+
+	while (!job.done) {
+		while (job.req == 0) {
+			pthread_cond_wait(&cond, &mutex);
+		}
+		if (job.usleep) usleep(job.usleep);
+		pthread_dependency_fulfill_np(job.req, job.req);
+		job.req = NULL;
+	}
+
+	pthread_mutex_unlock(&mutex);
+	return NULL;
+}
+
+static void
+post_req(pthread_dependency_t *req, useconds_t delay, bool done)
+{
+	pthread_mutex_lock(&mutex);
+	job.req = req;
+	job.usleep = delay;
+	job.done = done;
+	pthread_cond_signal(&cond);
+	pthread_mutex_unlock(&mutex);
+}
+
+T_DECL(dependency, "dependency", T_META_ALL_VALID_ARCHS(YES))
+{
+	pthread_dependency_t req;
+	pthread_t pth;
+	void *v;
+	int ret;
+
+	T_ASSERT_POSIX_ZERO(pthread_create(&pth, NULL, do_test, NULL), NULL);
+
+	T_LOG("Waiting on a pdependency that takes some time");
+
+	pthread_dependency_init_np(&req, pth, NULL);
+	post_req(&req, 100000, false);
+	v = pthread_dependency_wait_np(&req);
+	T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+	T_LOG("Waiting on a pdependency that is already fulfilled");
+
+	pthread_dependency_init_np(&req, pth, NULL);
+	post_req(&req, 0, false);
+	usleep(100000);
+	v = pthread_dependency_wait_np(&req);
+	T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+	T_LOG("Waiting on a fulfilled pdependency with the other thread exiting");
+
+	pthread_dependency_init_np(&req, pth, NULL);
+	post_req(&req, 0, true);
+	ret = pthread_join(pth, NULL);
+	T_EXPECT_POSIX_ZERO(ret, "pthread_join");
+
+	v = pthread_dependency_wait_np(&req);
+	T_EXPECT_EQ(v, &req, "pthread_dependency_wait worked");
+
+	T_END;
+}
diff --git a/tests/pthread_threadid_np.c b/tests/pthread_threadid_np.c
index d28ca65..19cfc25 100644
--- a/tests/pthread_threadid_np.c
+++ b/tests/pthread_threadid_np.c
@@ -9,7 +9,7 @@ extern __uint64_t __thread_selfid( void );
 static void *do_test(void * __unused arg)
 {
 	uint64_t threadid = __thread_selfid();
-	T_ASSERT_NOTNULL(threadid, NULL);
+	T_ASSERT_NE(threadid, (uint64_t)0, "__thread_selfid()");
 
 	uint64_t pth_threadid = 0;
 	T_ASSERT_POSIX_ZERO(pthread_threadid_np(NULL, &pth_threadid), NULL);
diff --git a/tests/rdar_32848402.c b/tests/rdar_32848402.c
index 65cd56e..068836a 100644
--- a/tests/rdar_32848402.c
+++ b/tests/rdar_32848402.c
@@ -72,7 +72,9 @@ T_DECL(thread_request_32848402, "repro for rdar://32848402")
 	end_spin = clock_gettime_nsec_np(CLOCK_MONOTONIC) + 2 * NSEC_PER_SEC;
 
 	dispatch_async_f(a, (void *)0, spin_and_pause);
-	for (long i = 1; i < get_ncpu(); i++) {
+	long n_threads = MIN((long)get_ncpu(),
+			pthread_qos_max_parallelism(QOS_CLASS_BACKGROUND, 0));
+	for (long i = 1; i < n_threads; i++) {
 		dispatch_async_f(b, (void *)i, spin);
 	}
 
diff --git a/tests/stack.c b/tests/stack.c
new file mode 100644
index 0000000..f910b28
--- /dev/null
+++ b/tests/stack.c
@@ -0,0 +1,82 @@
+#include <signal.h>
+#include <pthread/stack_np.h>
+
+#include "darwintest_defaults.h"
+#include <darwintest_utils.h>
+
+#if defined(__arm64__)
+#define call_chkstk(value) \
+		__asm__ volatile("orr x9, xzr, %0\t\n" \
+				"bl _thread_chkstk_darwin" : : "i"(value) : "x9")
+#define TRAPSIG SIGTRAP
+#elif defined(__x86_64__)
+#define call_chkstk(value) \
+		__asm__ volatile("movq %0, %%rax\t\n" \
+				"callq _thread_chkstk_darwin" : : "i"(value) : "rax")
+#define TRAPSIG SIGILL
+#elif defined(__i386__)
+#define call_chkstk(value) \
+		__asm__ volatile("movl %0, %%eax\t\n" \
+				"calll _thread_chkstk_darwin" : : "i"(value) : "eax")
+#define TRAPSIG SIGILL
+#endif
+
+static void
+got_signal(int signo __unused)
+{
+	T_PASS("calling with 1 << 24 crashed");
+	T_END;
+}
+
+T_DECL(chkstk, "chkstk",
+		T_META_ALL_VALID_ARCHS(YES), T_META_CHECK_LEAKS(NO))
+{
+#if defined(__arm__)
+	T_SKIP("not on armv7");
+#else
+
+	call_chkstk(1 << 8);
+	T_PASS("calling with 1 << 8");
+
+	call_chkstk(1 << 16);
+	T_PASS("calling with 1 << 16");
+
+	signal(TRAPSIG, got_signal);
+
+	call_chkstk(1 << 24);
+	T_FAIL("should have crashed");
+#endif
+}
+
+struct frame {
+	uintptr_t frame;
+	uintptr_t ret;
+};
+
+OS_NOINLINE OS_NOT_TAIL_CALLED
+static void
+do_stack_frame_decode_test(struct frame frames[], size_t n, size_t count)
+{
+	if (n < count) {
+		frames[n].frame = (uintptr_t)__builtin_frame_address(1);
+		frames[n].ret = (uintptr_t)__builtin_return_address(0);
+		do_stack_frame_decode_test(frames, n + 1, count);
+	} else {
+		uintptr_t frame = (uintptr_t)__builtin_frame_address(1);
+		uintptr_t ret;
+		while (count-- > 0) {
+			frame = pthread_stack_frame_decode_np(frame, &ret);
+			T_EXPECT_EQ(frames[count].frame, frame, "Frame %zd", count);
+			T_EXPECT_EQ(frames[count].ret, ret, "Retaddr %zd", count);
+		}
+	}
+}
+
+T_DECL(pthread_stack_frame_decode_np, "pthread_stack_frame_decode_np",
+		T_META_ALL_VALID_ARCHS(YES), T_META_CHECK_LEAKS(NO))
+{
+	struct frame frames[10];
+	frames[0].frame = (uintptr_t)__builtin_frame_address(1);
+	frames[0].ret = (uintptr_t)__builtin_return_address(0);
+	do_stack_frame_decode_test(frames, 1, 10);
+}
diff --git a/tests/stack_aslr.c b/tests/stack_aslr.c
index a8dab42..aaf483e 100644
--- a/tests/stack_aslr.c
+++ b/tests/stack_aslr.c
@@ -133,7 +133,7 @@ again:
 
 	for (int i = 0; i < attempts; i++) {
 		char *t;
-		asprintf(&t, "%s/%zd", tmp, i);
+		asprintf(&t, "%s/%d", tmp, i);
 		T_QUIET; T_ASSERT_POSIX_SUCCESS(mkdir(t, 0700), "mkdir");
 		setenv("BATS_TMP_DIR", t, 1); // hack to workaround rdar://33443485
 		free(t);
@@ -144,7 +144,7 @@ again:
 				T_QUIET; T_FAIL("Helper should complete in <.1s");
 				goto timeout;
 			}
-			usleep(1000);
+			usleep(1000 * 100);
 		} while (shmem->done <= i);
 	}
 	setenv("BATS_TMP_DIR", tmpdir, 1);
diff --git a/tests/stack_size.c b/tests/stack_size.c
new file mode 100644
index 0000000..3a52747
--- /dev/null
+++ b/tests/stack_size.c
@@ -0,0 +1,81 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "darwintest_defaults.h"
+
+#define PTHREAD_T_OFFSET (0)
+
+static void *
+function(void *arg)
+{
+	size_t expected_size = (size_t)(uintptr_t)arg;
+	T_ASSERT_EQ(pthread_get_stacksize_np(pthread_self()), expected_size,
+			"saw expected pthread_get_stacksize_np");
+	return NULL;
+}
+
+T_DECL(stack_size_default, "stack size of default pthread",
+		T_META_ALL_VALID_ARCHS(YES))
+{
+	static const size_t dflsize = 512 * 1024;
+	pthread_t thread;
+	pthread_attr_t attr;
+
+	T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+			(void *)(dflsize + PTHREAD_T_OFFSET)), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+}
+
+T_DECL(stack_size_customsize, "stack size of thread with custom stack size",
+		T_META_ALL_VALID_ARCHS(YES))
+{
+	static const size_t stksize = 768 * 1024;
+	pthread_t thread;
+	pthread_attr_t attr;
+
+	T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_attr_setstacksize(&attr, stksize), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+			(void *)(stksize + PTHREAD_T_OFFSET)), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+}
+
+T_DECL(stack_size_customaddr, "stack size of thread with custom stack addr",
+		T_META_ALL_VALID_ARCHS(YES))
+{
+	static const size_t stksize = 512 * 1024;
+	pthread_t thread;
+	pthread_attr_t attr;
+
+	uintptr_t stackaddr = (uintptr_t)valloc(stksize);
+	stackaddr += stksize; // address is top of stack
+
+	T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_attr_setstackaddr(&attr, (void *)stackaddr),
+			NULL);
+	T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+			(void *)stksize), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+	free((void *)(stackaddr - stksize));
+}
+
+T_DECL(stack_size_custom, "stack size of thread with custom stack addr+size",
+		T_META_ALL_VALID_ARCHS(YES))
+{
+	static const size_t stksize = 768 * 1024;
+	pthread_t thread;
+	pthread_attr_t attr;
+
+	uintptr_t stackaddr = (uintptr_t)valloc(stksize);
+	stackaddr += stksize; // address is top of stack
+
+	T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_attr_setstackaddr(&attr, (void *)stackaddr),
+			NULL);
+	T_ASSERT_POSIX_ZERO(pthread_attr_setstacksize(&attr, stksize), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_create(&thread, &attr, function,
+			(void *)stksize), NULL);
+	T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
+	free((void *)(stackaddr - stksize));
+}
diff --git a/tools/locktrace.lua b/tools/locktrace.lua
index ecc64bc..bb5380d 100755
--- a/tools/locktrace.lua
+++ b/tools/locktrace.lua
@@ -28,10 +28,26 @@ get_prefix = function(buf)
 	local proc
 	proc = buf.command
 
-	return string.format("%s %6.9f %-17s [%05d.%06x] %-24s",
+	return string.format("%s %6.9f %-17s [%05d.%06x] %-35s",
 		prefix, secs, proc, buf.pid, buf.threadid, buf.debugname)
 end
 
+get_count = function(val)
+	return ((val & 0xffffff00) >> 8)
+end
+
+get_kwq_type = function(val)
+	if val & 0xff == 0x1 then
+		return "MTX"
+	elseif val & 0xff == 0x2 then
+		return "CVAR"
+	elseif val & 0xff == 0x4 then
+		return "RWL"
+	else
+		return string.format("0x%04x", val)
+	end
+end
+
 decode_lval = function(lval)
 	local kbit = " "
 	if lval & 0x1 ~= 0 then
@@ -61,61 +77,282 @@ decode_sval = function(sval)
 	end
 
 	local count = sval >> 8
-	return string.format("[0x%06x, %s%s]", count, ibit, sbit)
+	return string.format("[0x%06x,  %s%s]", count, ibit, sbit)
+end
+
+decode_cv_sval = function(sval)
+	local sbit = " "
+	if sval & 0x1 ~= 0 then
+		sbit = "C"
+	end
+	local ibit = " "
+	if sval & 0x2 ~= 0 then
+		ibit = "P"
+	end
+
+	local count = sval >> 8
+	return string.format("[0x%06x,  %s%s]", count, ibit, sbit)
 end
 
 trace_codename("psynch_mutex_lock_updatebits", function(buf)
 	local prefix = get_prefix(buf)
 	if buf[4] == 0 then
-		printf("%s\tupdated lock bits, pre-kernel (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+		printf("%s\tupdated lock bits, pre-kernel\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+				prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
 	else
-		printf("%s\tupdated lock bits, post-kernel (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+		printf("%s\tupdated lock bits, post-kernel\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+				prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
 	end
 end)
 
 trace_codename("psynch_mutex_unlock_updatebits", function(buf)
 	local prefix = get_prefix(buf)
-	printf("%s\tupdated unlock bits (addr: 0x%016x, oldlval: %s, newlval: %s)\n", prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+	printf("%s\tupdated unlock bits\t\taddr: 0x%016x\toldl: %s\tnewl: %s\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]))
+end)
+
+trace_codename("psynch_ffmutex_lock_updatebits", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tlock path, bits update\t\taddr: 0x%016x\toldl: %s\toldu: %s\twaiters: %d\n",
+				prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), get_count(buf[2]) - get_count(buf[3]))
+	else
+		printf("%s\tlock path, bits update\t\taddr: 0x%016x\tnewl: %s\tnewu: %s\twaiters: %d\n",
+				prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), get_count(buf[2]) - get_count(buf[3]))
+	end
+end)
+
+trace_codename("psynch_ffmutex_unlock_updatebits", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tunlock path, update bits\taddr: 0x%016x\toldl: %s\tnewl: %s\tnewu: %s\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+end)
+
+trace_codename("psynch_ffmutex_wake", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tfirst fit kernel wake\t\taddr: 0x%016x\tlval: %s\tuval: %s\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+end)
+
+trace_codename("psynch_ffmutex_wait", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tfirstfit kernel wait\t\taddr: 0x%016x\tlval: %s\tuval: %s\tflags: 0x%x\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
+	else
+		printf("%s\tfirstfit kernel wait\t\taddr: 0x%016x\trval: %s\n",
+			prefix, buf[1], decode_lval(buf[2]))
+	end
 end)
 
 trace_codename("psynch_mutex_ulock", function(buf)
 	local prefix = get_prefix(buf)
 
 	if trace.debugid_is_start(buf.debugid) then
-		printf("%s\tlock busy, waiting in kernel (addr: 0x%016x, lval: %s, sval: %s, owner_tid: 0x%x)\n",
+		printf("%s\tlock busy, waiting in kernel\taddr: 0x%016x\tlval: %s\tsval: %s\towner_tid: 0x%x\n",
 			prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
 	elseif trace.debugid_is_end(buf.debugid) then
-		printf("%s\tlock acquired from kernel (addr: 0x%016x, updated bits: %s)\n",
+		printf("%s\tlock acquired from kernel\taddr: 0x%016x\tupdt: %s\n",
 			prefix, buf[1], decode_lval(buf[2]))
 	else
-		printf("%s\tlock taken, uncontended (addr: 0x%016x, lval: %s, sval: %s)\n",
+		printf("%s\tlock taken userspace\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
 			prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
 	end
 end)
 
 trace_codename("psynch_mutex_utrylock_failed", function(buf)
 	local prefix = get_prefix(buf)
-	printf("%s\tmutex trybusy addr: 0x%016x lval: %s sval: %s owner: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
+	printf("%s\tmutex trybusy\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\towner: 0x%x\n", prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
 end)
 
 trace_codename("psynch_mutex_uunlock", function(buf)
 	local prefix = get_prefix(buf)
 
 	if trace.debugid_is_start(buf.debugid) then
-		printf("%s\tunlock, signalling kernel waiters (addr: 0x%016x, lval: %s, sval: %s, owner_tid: 0x%x)\n",
+		printf("%s\tunlock, signalling kernel\taddr: 0x%016x\tlval: %s\tsval: %s\towner_tid: 0x%x\n",
 			prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]), buf[4])
 	elseif trace.debugid_is_end(buf.debugid) then
-		printf("%s\tunlock, waiters signalled (addr: 0x%016x, updated bits: %s)\n",
+		printf("%s\tunlock, waiters signalled\taddr: 0x%016x\tupdt: %s\n",
 			prefix, buf[1], decode_lval(buf[2]))
 	else
-		printf("%s\tunlock, no kernel waiters (addr: 0x%016x, lval: %s, sval: %s)\n",
+		printf("%s\tunlock, no kernel waiters\taddr: 0x%016x\tlval: %s\tsval: %s\n",
 			prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
 	end
 end)
 
--- The trace codes we need aren't enabled by default
-darwin.sysctlbyname("kern.pthread_debug_tracing", 1)
-completion_handler = function()
-	darwin.sysctlbyname("kern.pthread_debug_tracing", 0)
-end
-trace.set_completion_handler(completion_handler)
+trace_codename("psynch_mutex_clearprepost", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tclear prepost\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
+		prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+end)
+
+trace_codename("psynch_mutex_markprepost", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tmark prepost\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_sval(buf[3]))
+	else
+		printf("%s\tmark prepost\t\t\taddr: 0x%016x\tcleared: %d\n",
+			prefix, buf[1], buf[2])
+	end
+end)
+
+trace_codename("psynch_mutex_kwqallocate", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tkernel kwq allocated\t\taddr: 0x%016x\ttype: %s\tkwq: 0x%016x\n",
+			prefix, buf[1], get_kwq_type(buf[2]), buf[3])
+	elseif trace.debugid_is_end(buf.debugid) then
+		printf("%s\tkernel kwq allocated\t\taddr: 0x%016x\tlval: %s\tuval: %s\tsval: %s\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+	end
+end)
+
+trace_codename("psynch_mutex_kwqdeallocate", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tkernel kwq deallocated\t\taddr: 0x%016x\ttype: %s\tfreenow: %d\n",
+			prefix, buf[1], get_kwq_type(buf[2]), buf[3])
+	elseif trace.debugid_is_end(buf.debugid) then
+		printf("%s\tkernel kwq deallocated\t\taddr: 0x%016x\tlval: %s\tuval: %s\tsval: %s\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), decode_sval(buf[4]))
+	end
+end)
+
+trace_codename("psynch_mutex_kwqprepost", function(buf)
+	local prefix = get_prefix(buf)
+	if buf[4] == 0 then
+		printf("%s\tkernel prepost incremented\taddr: 0x%016x\tlval: %s\tinqueue: %d\n",
+			prefix, buf[1], decode_lval(buf[2]), buf[3])
+	elseif buf[4] == 1 then
+		printf("%s\tkernel prepost decremented\taddr: 0x%016x\tlval: %s\tremaining: %d\n",
+			prefix, buf[1], decode_lval(buf[2]), buf[3])
+	elseif buf[4] == 2 then
+		printf("%s\tkernel prepost cleared\t\taddr: 0x%016x\tlval: %s\n", prefix,
+			buf[1], decode_lval(buf[2]))
+	end
+end)
+
+trace_codename("psynch_mutex_kwqcollision", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tkernel kwq collision\t\taddr: 0x%016x\ttype: %d\n", prefix,
+		buf[1], buf[2])
+end)
+
+trace_codename("psynch_mutex_kwqsignal", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tkernel mutex signal\t\taddr: 0x%016x\tkwe: 0x%16x\ttid: 0x%x\tinqueue: %d\n",
+			prefix, buf[1], buf[2], buf[3], buf[4]);
+	else
+		printf("%s\tkernel mutex signal\t\taddr: 0x%016x\tkwe: 0x%16x\tret: 0x%x\n",
+			prefix, buf[1], buf[2], buf[3]);
+	end
+end)
+
+trace_codename("psynch_mutex_kwqwait", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tkernel mutex wait\t\taddr: 0x%016x\tinqueue: %d\tprepost: %d\tintr: %d\n",
+		prefix, buf[1], buf[2], buf[3], buf[4])
+end)
+
+trace_codename("psynch_cvar_kwait", function(buf)
+	local prefix = get_prefix(buf)
+	if buf[4] == 0 then
+		printf("%s\tkernel condvar wait\t\taddr: 0x%016x\tmutex: 0x%016x\tcgen: 0x%x\n",
+			prefix, buf[1], buf[2], buf[3])
+	elseif buf[4] == 1 then
+		printf("%s\tkernel condvar sleep\t\taddr: 0x%016x\tflags: 0x%x\n",
+			prefix, buf[1], buf[3])
+	elseif buf[4] == 2 then
+		printf("%s\tkernel condvar wait return\taddr: 0x%016x\terror: 0x%x\tupdt: 0x%x\n",
+			prefix, buf[1], buf[2], buf[3])
+	elseif buf[4] == 3 and (buf[2] & 0xff) == 60 then
+		printf("%s\tkernel condvar timeout\t\taddr: 0x%016x\terror: 0x%x\n",
+			prefix, buf[1], buf[2])
+	elseif buf[4] == 3 then
+		printf("%s\tkernel condvar wait error\taddr: 0x%016x\terror: 0x%x\n",
+			prefix, buf[1], buf[2])
+	elseif buf[4] == 4 then
+		printf("%s\tkernel condvar wait return\taddr: 0x%016x\tupdt: 0x%x\n",
+			prefix, buf[1], buf[2])
+	end
+end)
+
+trace_codename("psynch_cvar_clrprepost", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tkernel condvar clear prepost:\taddr: 0x%016x\ttype: 0x%x\tprepost seq: %s\n",
+		prefix, buf[1], buf[2], decode_lval(buf[3]))
+end)
+
+trace_codename("psynch_cvar_freeitems", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tcvar free fake/prepost items\taddr: 0x%016x\ttype: %d\t\t\tupto: %s\tall: %d\n",
+			prefix, buf[1], buf[2], decode_lval(buf[3]), buf[4])
+	elseif trace.debugid_is_end(buf.debugid) then
+		printf("%s\tcvar free fake/prepost items\taddr: 0x%016x\tfreed: %d\tsignaled: %d\tinqueue: %d\n",
+			prefix, buf[1], buf[2], buf[3], buf[4])
+	elseif buf[4] == 1 then
+		printf("%s\tcvar free, signalling waiter\taddr: 0x%016x\tinqueue: %d\tkwe: 0x%016x\n",
+			prefix, buf[1], buf[3], buf[2])
+	elseif buf[4] == 2 then
+		printf("%s\tcvar free, removing fake\taddr: 0x%016x\tinqueue: %d\tkwe: 0x%016x\n",
+			prefix, buf[1], buf[3], buf[2])
+	end
+end)
+
+trace_codename("psynch_cvar_signal", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tfrom: %s\tupto: %s\tbroad: %d\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_lval(buf[3]), buf[4])
+	elseif trace.debugid_is_end(buf.debugid) then
+		printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tupdt: %s\n",
+			prefix, buf[1], decode_cv_sval(buf[2]))
+	else
+		printf("%s\tkernel cvar signal\t\taddr: 0x%016x\tsignalled waiters (converted to broadcast: %d)\n",
+			prefix, buf[1], buf[2])
+	end
+end)
+
+trace_codename("psynch_cvar_broadcast", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tupto: %s\tinqueue: %d\n",
+			prefix, buf[1], decode_lval(buf[2]), buf[3])
+	elseif trace.debugid_is_end(buf.debugid) then
+		printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tupdt: %s\n",
+			prefix, buf[1], decode_lval(buf[2]))
+	elseif buf[4] == 1 then
+		printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tsignalling: 0x%16x\n",
+			prefix, buf[1], buf[2])
+	elseif buf[4] == 2 then
+		printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tremoving fake: 0x%16x\tstate: %d\n",
+			prefix, buf[1], buf[2], buf[3])
+	elseif buf[4] == 3 then
+		printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tprepost\tlval: %s\tsval: %s\n",
+			prefix, buf[1], decode_lval(buf[2]), decode_cv_sval(buf[3]))
+	elseif buf[4] == 4 then
+		printf("%s\tkernel cvar broadcast\t\taddr: 0x%016x\tbroadcast prepost: 0x%016x\n",
+			prefix, buf[1], buf[2])
+	end
+end)
+
+trace_codename("psynch_cvar_zeroed", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tkernel cvar zeroed\t\taddr: 0x%016x\tlval: %s\tsval: %s\tinqueue: %d\n",
+		prefix, buf[1], decode_lval(buf[2]), decode_cv_sval(buf[3]), buf[4])
+end)
+
+trace_codename("psynch_cvar_updateval", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tcvar updateval\t\t\taddr: 0x%016x\tlval: %s\tsval: %s\tupdateval: %s\n",
+			prefix, buf[1], decode_lval(buf[2] & 0xffffffff), decode_cv_sval(buf[2] >> 32), decode_cv_sval(buf[3]))
+	elseif trace.debugid_is_end(buf.debugid) then
+		printf("%s\tcvar updateval (updated)\taddr: 0x%016x\tlval: %s\tsval: %s\tdiffgen: %d\tneedsclear: %d\n",
+			prefix, buf[1], decode_lval(buf[2] & 0xffffffff), decode_cv_sval(buf[2] >> 32), buf[3] >> 32, buf[3] & 0x1)
+	end
+end)
+
diff --git a/tools/wqtrace.lua b/tools/wqtrace.lua
deleted file mode 100755
index 2da03da..0000000
--- a/tools/wqtrace.lua
+++ /dev/null
@@ -1,349 +0,0 @@
-#!/usr/local/bin/luatrace -s
-
-trace_codename = function(codename, callback)
-	local debugid = trace.debugid(codename)
-	if debugid ~= 0 then
-		trace.single(debugid,callback)
-	else
-		printf("WARNING: Cannot locate debugid for '%s'\n", codename)
-	end
-end
-
-initial_timestamp = 0
-workqueue_ptr_map = {};
-get_prefix = function(buf)
-	if initial_timestamp == 0 then
-		initial_timestamp = buf.timestamp
-	end
-	local secs = trace.convert_timestamp_to_nanoseconds(buf.timestamp - initial_timestamp) / 1000000000
-
-	local prefix
-	if trace.debugid_is_start(buf.debugid) then
-		prefix = "â"
-	elseif trace.debugid_is_end(buf.debugid) then
-		prefix = "â"
-	else
-		prefix = "â"
-	end
-
-	local proc
-	if buf.command ~= "kernel_task" then
-		proc = buf.command
-		workqueue_ptr_map[buf[1]] = buf.command
-	elseif workqueue_ptr_map[buf[1]] ~= nil then
-		proc = workqueue_ptr_map[buf[1]]
-	else
-		proc = "UNKNOWN"
-	end
-
-	return string.format("%s %6.9f %-17s [%05d.%06x] %-24s",
-		prefix, secs, proc, buf.pid, buf.threadid, buf.debugname)
-end
-
-parse_pthread_priority = function(pri)
-	pri = pri & 0xffffffff
-	if (pri & 0x02000000) == 0x02000000 then
-		return "Manager"
-	end
-	local qos = (pri & 0x00ffff00) >> 8
-	if qos == 0x20 then
-		return string.format("UI[%x]", pri);
-	elseif qos == 0x10 then
-		return string.format("IN[%x]", pri);
-	elseif qos == 0x08 then
-		return string.format("DF[%x]", pri);
-	elseif qos == 0x04 then
-		return string.format("UT[%x]", pri);
-	elseif qos == 0x02 then
-		return string.format("BG[%x]", pri);
-	elseif qos == 0x01 then
-		return string.format("MT[%x]", pri);
-	elseif qos == 0x00 then
-		return string.format("--[%x]", pri);
-	else
-		return string.format("??[%x]", pri);
-	end
-end
-
-parse_qos_bucket = function(pri)
-	if pri == 0 then
-		return string.format("UI[%x]", pri);
-	elseif pri == 1 then
-		return string.format("IN[%x]", pri);
-	elseif pri == 2 then
-		return string.format("DF[%x]", pri);
-	elseif pri == 3 then
-		return string.format("UT[%x]", pri);
-	elseif pri == 4 then
-		return string.format("BG[%x]", pri);
-	elseif pri == 5 then
-		return string.format("MT[%x]", pri);
-	elseif pri == 6 then
-		return string.format("MG[%x]", pri);
-	else
-		return string.format("??[%x]", pri);
-	end
-end
-
-parse_thactive_req_bucket = function(pri)
-    if pri ~= 6 then
-        return parse_qos_bucket(pri)
-    end
-    return "None"
-end
-
-get_thactive = function(low, high)
-    return string.format("req: %s, MG: %d, UI: %d, IN: %d, DE: %d, UT: %d, BG: %d, MT: %d",
-           parse_thactive_req_bucket(high >> (16 * 3)), (high >> (2 * 16)) & 0xffff,
-           (low  >> (0 * 16)) & 0xffff, (low  >> (1 * 16)) & 0xffff,
-           (low  >> (2 * 16)) & 0xffff, (low  >> (3 * 16)) & 0xffff,
-           (high >> (0 * 16)) & 0xffff, (high >> (1 * 16)) & 0xffff)
-end
-
--- workqueue lifecycle
-
-trace_codename("wq_pthread_exit", function(buf)
-	local prefix = get_prefix(buf)
-	if trace.debugid_is_start(buf.debugid) then
-		printf("%s\tprocess is exiting\n",prefix)
-	else
-		printf("%s\tworkqueue marked as exiting and timer is complete\n",prefix)
-	end
-end)
-
-trace_codename("wq_workqueue_exit", function(buf)
-	local prefix = get_prefix(buf)
-	if trace.debugid_is_start(buf.debugid) then
-		printf("%s\tall threads have exited, cleaning up\n",prefix)
-	else
-		printf("%s\tclean up complete\n",prefix)
-	end
-end)
-
-trace_codename("wq_start_add_timer", function(buf)
-	local prefix = get_prefix(buf)
-	printf("%s\tarming timer to fire in %d us (flags: %x, reqcount: %d)\n",
-		prefix, buf.arg4, buf.arg3, buf.arg2)
-end)
-
-trace_codename("wq_add_timer", function(buf)
-	local prefix = get_prefix(buf)
-	if trace.debugid_is_start(buf.debugid) then
-		printf("%s\tadd_timer fired (flags: %x, nthreads: %d, thidlecount: %d)\n",
-			prefix, buf.arg2, buf.arg3, buf.arg4)
-	elseif trace.debugid_is_end(buf.debugid) then
-		printf("%s\tadd_timer completed (start_timer: %x, nthreads: %d, thidlecount: %d)\n",
-			prefix, buf.arg2, buf.arg3, buf.arg4)
-	else
-		printf("%s\tadd_timer added threads (reqcount: %d, thidlecount: %d, busycount: %d)\n",
-			prefix, buf.arg2, buf.arg3, buf.arg4)
-
-	end
-end)
-
-trace_codename("wq_run_threadreq", function(buf)
-	local prefix = get_prefix(buf)
-	if trace.debugid_is_start(buf.debugid) then
-		if buf[2] > 0 then
-			printf("%s\trun_threadreq: %x (priority: %s, flags: %d) on %x\n",
-					prefix, buf[2], parse_qos_bucket(buf[4] >> 16), buf[4] & 0xff, buf[3])
-		else
-			printf("%s\trun_threadreq: <none> on %x\n",
-					prefix, buf[3])
-		end
-	else
-		if buf[2] == 1 then
-			printf("%s\tpended event manager, already running\n", prefix)
-		elseif buf[2] == 2 then
-			printf("%s\tnothing to do\n", prefix)
-		elseif buf[2] == 3 then
-			printf("%s\tno eligible request found\n", prefix)
-		elseif buf[2] == 4 then
-			printf("%s\tadmission control failed\n", prefix)
-		elseif buf[2] == 5 then
-			printf("%s\tunable to add new thread (may_add_new_thread: %d, nthreads: %d)\n", prefix, buf[3], buf[4])
-		elseif buf[2] == 6 then
-			printf("%s\tthread creation failed\n", prefix)
-		elseif buf[2] == 0 then
-			printf("%s\tsuccess\n", prefix)
-		else
-			printf("%s\tWARNING: UNKNOWN END CODE:%d\n", prefix, buf.arg4)
-		end
-	end
-end)
-
-trace_codename("wq_run_threadreq_mgr_merge", function(buf)
-	local prefix = get_prefix(buf)
-	printf("%s\t\tmerging incoming manager request into existing\n", prefix)
-end)
-
-trace_codename("wq_run_threadreq_req_select", function(buf)
-	local prefix = get_prefix(buf)
-	if buf[3] == 1 then
-		printf("%s\t\tselected event manager request %x\n", prefix, buf[2])
-	elseif buf[3] == 2 then
-		printf("%s\t\tselected overcommit request %x\n", prefix, buf[2])
-	elseif buf[3] == 3 then
-		printf("%s\t\tselected constrained request %x\n", prefix, buf[2])
-	else
-		printf("%s\t\tWARNING: UNKNOWN DECISION CODE:%d\n", prefix, buf.arg[3])
-	end
-end)
-
-trace_codename("wq_run_threadreq_thread_select", function(buf)
-	local prefix = get_prefix(buf)
-	if buf[2] == 1 then
-		printf("%s\t\trunning on current thread %x\n", prefix, buf[3])
-	elseif buf[2] == 2 then
-		printf("%s\t\trunning on idle thread %x\n", prefix, buf[3])
-	elseif buf[2] == 3 then
-		printf("%s\t\tcreated new thread\n", prefix)
-	else
-		printf("%s\t\tWARNING: UNKNOWN DECISION CODE:%d\n", prefix, buf.arg[2])
-	end
-end)
-
-trace_codename("wq_thread_reset_priority", function(buf)
-	local prefix = get_prefix(buf)
-	local old_qos = buf[3] >> 16;
-	local new_qos = buf[3] & 0xff;
-	if buf[4] == 1 then
-		printf("%s\t\treset priority of %x from %s to %s\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-	elseif buf[4] == 2 then
-		printf("%s\t\treset priority of %x from %s to %s for reserve manager\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-	elseif buf[4] == 3 then
-		printf("%s\t\treset priority of %x from %s to %s for cleanup\n", prefix, buf[2], parse_qos_bucket(old_qos), parse_qos_bucket(new_qos))
-	end
-end)
-
-trace_codename("wq_thread_park", function(buf)
-	local prefix = get_prefix(buf)
-	if trace.debugid_is_start(buf.debugid) then
-		printf("%s\tthread parking\n", prefix)
-	else
-		printf("%s\tthread woken\n", prefix)
-	end
-end)
-
-trace_codename("wq_thread_squash", function(buf)
-	local prefix = get_prefix(buf)
-	printf("%s\tthread squashed from %s to %s\n", prefix,
-			parse_qos_bucket(buf[2]), parse_qos_bucket(buf[3]))
-end)
-
-trace.enable_thread_cputime()
-runitem_time_map = {}
-runitem_cputime_map = {}
-trace_codename("wq_runitem", function(buf)
-	local prefix = get_prefix(buf)
-	if trace.debugid_is_start(buf.debugid) then
-		runitem_time_map[buf.threadid] = buf.timestamp;
-		runitem_cputime_map[buf.threadid] = trace.cputime_for_thread(buf.threadid);
-
-		printf("%s\tSTART running item @ %s\n", prefix, parse_qos_bucket(buf[3]))
-	elseif runitem_time_map[buf.threadid] then
-		local time = buf.timestamp - runitem_time_map[buf.threadid]
-		local cputime = trace.cputime_for_thread(buf.threadid) - runitem_cputime_map[buf.threadid]
-
-		local time_ms = trace.convert_timestamp_to_nanoseconds(time) / 1000000
-		local cputime_ms = trace.convert_timestamp_to_nanoseconds(cputime) / 1000000
-
-		printf("%s\tDONE running item @ %s: time = %6.6f ms, cputime = %6.6f ms\n",
-				prefix, parse_qos_bucket(buf[2]), time_ms, cputime_ms)
-
-		runitem_time_map[buf.threadid] = 0
-		runitem_cputime_map[buf.threadid] = 0
-	else
-		printf("%s\tDONE running item @ %s\n", prefix, parse_qos_bucket(buf[2]))
-	end
-end)
-
-trace_codename("wq_runthread", function(buf)
-	local prefix = get_prefix(buf)
-	if trace.debugid_is_start(buf.debugid) then
-		printf("%s\tSTART running thread\n", prefix)
-	elseif trace.debugid_is_end(buf.debugid) then
-		printf("%s\tDONE running thread\n", prefix)
-	end
-end)
-
-trace_codename("wq_thactive_update", function(buf)
-    local prefix = get_prefix(buf)
-    local thactive = get_thactive(buf[2], buf[3])
-    if buf[1] == 1 then
-        printf("%s\tthactive constrained pre-post (%s)\n", prefix, thactive)
-    elseif buf[1] == 2 then
-        printf("%s\tthactive constrained run (%s)\n", prefix, thactive)
-    else
-        return
-    end
-end)
-
-trace_codename("wq_thread_block", function(buf)
-	local prefix = get_prefix(buf)
-        local req_pri = parse_thactive_req_bucket(buf[3] >> 8)
-	if trace.debugid_is_start(buf.debugid) then
-		printf("%s\tthread blocked (activecount: %d, priority: %s, req_pri: %s, reqcount: %d, start_timer: %d)\n",
-			prefix, buf[2], parse_qos_bucket(buf[3] & 0xff), req_pri, buf[4] >> 1, buf[4] & 0x1)
-	else
-		printf("%s\tthread unblocked (activecount: %d, priority: %s, req_pri: %s, threads_scheduled: %d)\n",
-			prefix, buf[2], parse_qos_bucket(buf[3] & 0xff), req_pri, buf[4])
-	end
-end)
-
-trace_codename("wq_thread_create_failed", function(buf)
-	local prefix = get_prefix(buf)
-	if buf[3] == 0 then
-		printf("%s\tfailed to create new workqueue thread, kern_return: 0x%x\n",
-			prefix, buf[2])
-	elseif buf[3] == 1 then
-		printf("%s\tfailed to vm_map workq thread stack: 0x%x\n", prefix, buf[2])
-	elseif buf[3] == 2 then
-		printf("%s\tfailed to vm_protect workq thread guardsize: 0x%x\n", prefix, buf[2])
-	end
-end)
-
-trace_codename("wq_thread_create", function(buf)
-	printf("%s\tcreated new workqueue thread\n", get_prefix(buf))
-end)
-
-trace_codename("wq_wqops_reqthreads", function(buf)
-	local prefix = get_prefix(buf)
-	printf("%s\tuserspace requested %d threads at %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-end)
-
-trace_codename("wq_kevent_reqthreads", function(buf)
-	local prefix = get_prefix(buf)
-	if buf[4] == 0 then
-		printf("%s\tkevent requested a thread at %s\n", prefix, parse_pthread_priority(buf[3]));
-	elseif buf[4] == 1 then
-		printf("%s\tworkloop requested a thread for req %x at %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-	elseif buf[4] == 2 then
-		printf("%s\tworkloop updated priority of req %x to %s\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-	elseif buf[4] == 3 then
-		printf("%s\tworkloop canceled req %x\n", prefix, buf[2], parse_pthread_priority(buf[3]));
-	elseif buf[4] == 4 then
-		printf("%s\tworkloop redrove a thread request\n", prefix);
-	end
-end)
-
-trace_codename("wq_constrained_admission", function(buf)
-	local prefix = get_prefix(buf)
-	if buf[2] == 1 then
-		printf("fail: %s\twq_constrained_threads_scheduled=%d >= wq_max_constrained_threads=%d\n",
-                prefix, buf[3], buf[4])
-	elseif (buf[2] == 2) or (buf[2] == 3) then
-		local success = nil;
-		if buf[2] == 2 then success = "success"
-		else success = "fail" end
-		printf("%s: %s\tthactive_count=%d + busycount=%d >= wq->wq_max_concurrency\n",
-				prefix, success, buf[3], buf[4])
-	end
-end)
-
--- The trace codes we need aren't enabled by default
-darwin.sysctlbyname("kern.pthread_debug_tracing", 1)
-completion_handler = function()
-	darwin.sysctlbyname("kern.pthread_debug_tracing", 0)
-end
-trace.set_completion_handler(completion_handler)
diff --git a/xcodescripts/install-lldbmacros.sh b/xcodescripts/install-lldbmacros.sh
index e50ee44..9501f96 100644
--- a/xcodescripts/install-lldbmacros.sh
+++ b/xcodescripts/install-lldbmacros.sh
@@ -2,4 +2,17 @@
 # install the pthread lldbmacros into the module
 
 mkdir -p $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python || true
-rsync -aq $SRCROOT/lldbmacros/* $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python/
+rsync -aq $SRCROOT/lldbmacros/* $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python
+
+for variant in $BUILD_VARIANTS; do
+	case $variant in
+	normal)
+		SUFFIX=""
+		;;
+	*)
+		SUFFIX="_$variant"
+		;;
+	esac
+
+	ln -sf init.py $DWARF_DSYM_FOLDER_PATH/$DWARF_DSYM_FILE_NAME/Contents/Resources/Python/$EXECUTABLE_NAME$SUFFIX.py
+done
diff --git a/xcodescripts/kext.xcconfig b/xcodescripts/kext.xcconfig
index fcd42ea..84e9079 100644
--- a/xcodescripts/kext.xcconfig
+++ b/xcodescripts/kext.xcconfig
@@ -41,7 +41,7 @@ LLVM_LTO_development = NO
 LLVM_LTO_kasan = NO
 LLVM_LTO = $(LLVM_LTO_$(PTHREAD_VARIANT))
 
-GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T
+GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T __PTHREAD_EXPOSE_INTERNALS__
 GCC_PREPROCESSOR_DEFINITIONS_kext_development = MACH_ASSERT DEBUG
 GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS_kext) $(GCC_PREPROCESSOR_DEFINITIONS_kext_$(PTHREAD_VARIANT))
 
diff --git a/xcodescripts/pthread.dirty b/xcodescripts/pthread.dirty
new file mode 100644
index 0000000..2a8f66e
--- /dev/null
+++ b/xcodescripts/pthread.dirty
@@ -0,0 +1,33 @@
+# cacheline-aligned
+
+# uint64_t sized
+___pthread_stack_hint
+
+# pointer-sized
+___libdispatch_keventfunction
+___libdispatch_workerfunction
+___libdispatch_workloopfunction
+___pthread_head
+__main_thread_ptr
+__pthread_free
+__pthread_keys
+__pthread_malloc
+__pthread_ptr_munge_token
+_exitf
+
+# int-sized
+___is_threaded
+___libdispatch_offset
+___pthread_supported_features
+___pthread_tsd_lock
+___pthread_tsd_max
+___unix_conforming
+__main_qos
+__pthread_count
+__pthread_list_lock
+
+# byte-sized
+___workq_newapi
+_default_priority
+_max_priority
+_min_priority
diff --git a/xcodescripts/pthread.xcconfig b/xcodescripts/pthread.xcconfig
index 7b2f244..1dedcaa 100644
--- a/xcodescripts/pthread.xcconfig
+++ b/xcodescripts/pthread.xcconfig
@@ -57,7 +57,7 @@ DISABLED_WARNING_CFLAGS = -Wno-int-conversion -Wno-missing-prototypes -Wno-sign-
 WARNING_CFLAGS = -Wall -Wextra -Warray-bounds-pointer-arithmetic -Wcomma -Wconditional-uninitialized -Wcovered-switch-default -Wdate-time -Wdeprecated -Wdouble-promotion -Wduplicate-enum -Wfloat-equal -Widiomatic-parentheses -Wignored-qualifiers -Wimplicit-fallthrough -Wmissing-noreturn -Wnullable-to-nonnull-conversion -Wover-aligned -Wpointer-arith -Wstatic-in-inline -Wtautological-compare -Wunguarded-availability -Wunused $(NO_WARNING_CFLAGS) $(DISABLED_WARNING_CFLAGS)
 NO_WARNING_CFLAGS = -Wno-pedantic -Wno-bad-function-cast -Wno-c++98-compat-pedantic -Wno-cast-align -Wno-cast-qual -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-format-nonliteral -Wno-missing-variable-declarations -Wno-packed -Wno-padded -Wno-reserved-id-macro -Wno-switch-enum -Wno-undef -Wno-unreachable-code-aggressive -Wno-unused-macros -Wno-used-but-marked-unused
 
-BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS)
+BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS) __PTHREAD_EXPOSE_INTERNALS__
 GCC_PREPROCESSOR_DEFINITIONS = $(BASE_PREPROCESSOR_MACROS) $(PLATFORM_PREPROCESSOR_DEFINITIONS)
 
 // TODO: Remove -fstack-protector on _debug when it is moved to libplatform
@@ -68,8 +68,10 @@ OTHER_CFLAGS_debug = -fno-inline -O0 -DDEBUG=1
 LINK_WITH_STANDARD_LIBRARIES = NO
 DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
 DYLIB_COMPATIBILITY_VERSION = 1
+DIRTY_LDFLAGS = -Wl,-dirty_data_list,$(SRCROOT)/xcodescripts/pthread.dirty
+DIRTY_LDFLAGS[sdk=macos*] =
 DYLIB_LDFLAGS = -Wl,-alias_list,$(SRCROOT)/xcodescripts/pthread.aliases -Wl,-umbrella,System -L/usr/lib/system -lsystem_kernel -lsystem_platform -ldyld -lcompiler_rt
-OTHER_LDFLAGS = $(DYLIB_LDFLAGS) $(CR_LDFLAGS) $(PLATFORM_LDFLAGS)
+OTHER_LDFLAGS = $(DYLIB_LDFLAGS) $(DIRTY_LDFLAGS) $(CR_LDFLAGS) $(PLATFORM_LDFLAGS)
 
 // Simulator build rules
 EXCLUDED_SOURCE_FILE_NAMES[sdk=iphonesimulator*] = *.c *.s
diff --git a/xcodescripts/resolved.xcconfig b/xcodescripts/resolved.xcconfig
index 2b33118..863252a 100644
--- a/xcodescripts/resolved.xcconfig
+++ b/xcodescripts/resolved.xcconfig
@@ -1,6 +1,6 @@
 #include "pthread.xcconfig"
 
-SUPPORTED_PLATFORMS = iphoneos appletvos watchos
+SUPPORTED_PLATFORMS = iphoneos
 PRODUCT_NAME = pthread_$(RESOLVED_VARIANT)
 OTHER_LDFLAGS =
 SKIP_INSTALL = YES
-- 
2.45.2