From 964d3577b041867f776d8eb940bf4a1108ffb97c Mon Sep 17 00:00:00 2001
From: Apple <opensource@apple.com>
Date: Fri, 4 Sep 2015 17:29:03 +0000
Subject: [PATCH] libpthread-137.1.1.tar.gz

---
 kern/kern_init.c                            |    3 +
 kern/kern_internal.h                        |   23 +-
 kern/kern_policy.c                          |   21 +-
 kern/kern_support.c                         | 1873 ++++++++++++-------
 kern/kern_synch.c                           |    1 -
 kern/kern_trace.h                           |    8 +
 kern/workqueue_internal.h                   |   41 +-
 libpthread.xcodeproj/project.pbxproj        |  384 +++-
 lldbmacros/pthread.py                       |   20 +-
 man/pthread_main_np.3                       |   57 +
 os/atomic.h                                 |  381 ++++
 os/atomic_llsc.h                            |   33 +
 private/qos_private.h                       |   23 +
 private/workqueue_private.h                 |   15 +-
 pthread/pthread.h                           |    1 -
 pthread/pthread_spis.h                      |    1 +
 src/internal.h                              |   41 +-
 src/pthread.c                               |  206 +-
 src/pthread_asm.s                           |    9 +
 src/pthread_cond.c                          |   39 +-
 src/pthread_mutex.c                         |  455 +++--
 src/pthread_mutex_up.c                      |   30 +
 src/pthread_rwlock.c                        |   40 +-
 src/pthread_support.c                       |   14 +-
 src/pthread_tsd.c                           |  103 +-
 src/qos.c                                   |   16 +-
 src/resolver.c                              |   54 +
 src/resolver.h                              |   39 +
 src/resolver_internal.h                     |   50 +
 tests/Makefile                              |   20 +
 tests/atfork.c                              |   75 +
 tests/cond.c                                |  114 ++
 tests/cond_timed.c                          |  115 ++
 tests/custom_stack.c                        |   38 +
 tests/join.c                                |   95 +
 tests/maxwidth.c                            |   97 +
 tests/mutex.c                               |   87 +
 tests/once.c                                |   61 +
 tests/qos.c                                 |  135 ++
 tests/rwlock-signal.c                       |  152 ++
 tests/rwlock.c                              |   98 +
 tests/tsd.c                                 |   55 +
 tests/wq_block_handoff.c                    |   60 +
 tests/wq_event_manager.c                    |   95 +
 tests/wq_kevent.c                           |  108 ++
 tests/wq_kevent.h                           |   59 +
 tests/wq_kevent_stress.c                    |  111 ++
 tools/wqtrace.lua                           |  279 +++
 xcodescripts/install-manpages.sh            |    1 +
 xcodescripts/install-symlinks.sh            |    1 -
 xcodescripts/install-sys-headers.sh         |    1 -
 xcodescripts/kext.xcconfig                  |    6 +-
 xcodescripts/kext_debug.xcconfig            |    4 +
 xcodescripts/pthread.xcconfig               |   26 +-
 xcodescripts/pthread_debug.xcconfig         |    4 +
 xcodescripts/pthread_introspection.xcconfig |    8 +
 56 files changed, 4921 insertions(+), 965 deletions(-)
 create mode 100644 man/pthread_main_np.3
 create mode 100644 os/atomic.h
 create mode 100644 os/atomic_llsc.h
 create mode 100644 src/pthread_mutex_up.c
 create mode 100644 src/resolver.c
 create mode 100644 src/resolver.h
 create mode 100644 src/resolver_internal.h
 create mode 100644 tests/Makefile
 create mode 100644 tests/atfork.c
 create mode 100644 tests/cond.c
 create mode 100644 tests/cond_timed.c
 create mode 100644 tests/custom_stack.c
 create mode 100644 tests/join.c
 create mode 100644 tests/maxwidth.c
 create mode 100644 tests/mutex.c
 create mode 100644 tests/once.c
 create mode 100644 tests/qos.c
 create mode 100644 tests/rwlock-signal.c
 create mode 100644 tests/rwlock.c
 create mode 100644 tests/tsd.c
 create mode 100644 tests/wq_block_handoff.c
 create mode 100644 tests/wq_event_manager.c
 create mode 100644 tests/wq_kevent.c
 create mode 100644 tests/wq_kevent.h
 create mode 100644 tests/wq_kevent_stress.c
 create mode 100755 tools/wqtrace.lua
 create mode 100644 xcodescripts/kext_debug.xcconfig
 create mode 100644 xcodescripts/pthread_debug.xcconfig
 create mode 100644 xcodescripts/pthread_introspection.xcconfig

diff --git a/kern/kern_init.c b/kern/kern_init.c
index 8b4b60a..1f7e547 100644
--- a/kern/kern_init.c
+++ b/kern/kern_init.c
@@ -45,6 +45,9 @@ const struct pthread_functions_s pthread_internal_functions = {
 	.psynch_rw_unlock = _psynch_rw_unlock,
 	.psynch_rw_wrlock = _psynch_rw_wrlock,
 	.psynch_rw_yieldwrlock = _psynch_rw_yieldwrlock,
+
+	.workq_reqthreads = _workq_reqthreads,
+	.thread_qos_from_pthread_priority = _thread_qos_from_pthread_priority,
 };
 
 kern_return_t pthread_start(__unused kmod_info_t * ki, __unused void *d)
diff --git a/kern/kern_internal.h b/kern/kern_internal.h
index aefb66b..187c81f 100644
--- a/kern/kern_internal.h
+++ b/kern/kern_internal.h
@@ -50,6 +50,7 @@
 #define PTHREAD_FEATURE_BSDTHREADCTL	0x04		/* is the bsdthread_ctl syscall available */
 #define PTHREAD_FEATURE_SETSELF			0x08		/* is the BSDTHREAD_CTL_SET_SELF command of bsdthread_ctl available */
 #define PTHREAD_FEATURE_QOS_MAINTENANCE	0x10		/* is QOS_CLASS_MAINTENANCE available */
+#define PTHREAD_FEATURE_KEVENT          0x20		/* supports direct kevent delivery */
 #define PTHREAD_FEATURE_QOS_DEFAULT		0x40000000	/* the kernel supports QOS_CLASS_DEFAULT */
 
 /* pthread bsdthread_ctl sysctl commands */
@@ -222,7 +223,8 @@ struct _pthread_registration_data {
 	PTHREAD_FEATURE_BSDTHREADCTL | \
 	PTHREAD_FEATURE_SETSELF | \
 	PTHREAD_FEATURE_QOS_MAINTENANCE | \
-	PTHREAD_FEATURE_QOS_DEFAULT)
+	PTHREAD_FEATURE_QOS_DEFAULT | \
+	PTHREAD_FEATURE_KEVENT )
 
 extern pthread_callbacks_t pthread_kern;
 
@@ -238,10 +240,11 @@ struct ksyn_waitq_element {
 };
 typedef struct ksyn_waitq_element * ksyn_waitq_element_t;
 
-pthread_priority_t pthread_qos_class_get_priority(int qos);
-int pthread_priority_get_qos_class(pthread_priority_t priority);
-int pthread_priority_get_class_index(pthread_priority_t priority);
-pthread_priority_t pthread_priority_from_class_index(int index);
+pthread_priority_t pthread_qos_class_get_priority(int qos) __attribute__((const));
+int pthread_priority_get_qos_class(pthread_priority_t priority) __attribute__((const));
+int pthread_priority_get_class_index(pthread_priority_t priority) __attribute__((const));
+int qos_get_class_index(int qos) __attribute__((const));
+pthread_priority_t pthread_priority_from_class_index(int index) __attribute__((const));
 
 #define PTH_DEFAULT_STACKSIZE 512*1024
 #define MAX_PTHREAD_SIZE 64*1024
@@ -302,6 +305,16 @@ extern thread_call_t psynch_thcall;
 
 struct uthread* current_uthread(void);
 
+// Call for the kernel's kevent system to request threads.  A list of QoS/event
+// counts should be provided, sorted by flags and then QoS class.  If the
+// identity of the thread to handle the request is known, it will be returned.
+// If a new thread must be created, NULL will be returned.
+thread_t _workq_reqthreads(struct proc *p, int requests_count,
+						   workq_reqthreads_req_t requests);
+
+// Resolve a pthread_priority_t to a QoS/relative pri
+integer_t _thread_qos_from_pthread_priority(unsigned long pri, unsigned long *flags);
+
 #endif // KERNEL
 
 #endif /* _SYS_PTHREAD_INTERNAL_H_ */
diff --git a/kern/kern_policy.c b/kern/kern_policy.c
index 23fb0d0..a168bdb 100644
--- a/kern/kern_policy.c
+++ b/kern/kern_policy.c
@@ -28,6 +28,7 @@
 
 #include "kern_internal.h"
 #include <kern/debug.h>
+#include <kern/assert.h>
 
 pthread_priority_t
 pthread_qos_class_get_priority(int qos)
@@ -70,6 +71,7 @@ pthread_priority_from_class_index(int index)
 		case 3: qos = QOS_CLASS_UTILITY; break;
 		case 4: qos = QOS_CLASS_BACKGROUND; break;
 		case 5: qos = QOS_CLASS_MAINTENANCE; break;
+		case 6: assert(index != 6); // EVENT_MANAGER should be handled specially
 		default:
 			/* Return the utility band if we don't understand the input. */
 			qos = QOS_CLASS_UTILITY;
@@ -82,9 +84,8 @@ pthread_priority_from_class_index(int index)
 }
 
 int
-pthread_priority_get_class_index(pthread_priority_t priority)
-{
-	switch (_pthread_priority_get_qos_newest(priority)) {
+qos_get_class_index(int qos){
+	switch (qos){
 		case QOS_CLASS_USER_INTERACTIVE: return 0;
 		case QOS_CLASS_USER_INITIATED: return 1;
 		case QOS_CLASS_DEFAULT: return 2;
@@ -96,3 +97,17 @@ pthread_priority_get_class_index(pthread_priority_t priority)
 			return 2;
 	}
 }
+
+int
+pthread_priority_get_class_index(pthread_priority_t priority)
+{
+	return qos_get_class_index(_pthread_priority_get_qos_newest(priority));
+}
+
+integer_t
+_thread_qos_from_pthread_priority(unsigned long priority, unsigned long *flags){
+    if (flags){
+        *flags = (int)_pthread_priority_get_flags(priority) >> _PTHREAD_PRIORITY_FLAGS_SHIFT;
+    }
+    return pthread_priority_get_qos_class(priority);
+}
diff --git a/kern/kern_support.c b/kern/kern_support.c
index cdd2ed0..a081689 100644
--- a/kern/kern_support.c
+++ b/kern/kern_support.c
@@ -30,6 +30,8 @@
  *	pthread_synch.c
  */
 
+#pragma mark - Front Matter
+
 #define  _PTHREAD_CONDATTR_T
 #define  _PTHREAD_COND_T
 #define _PTHREAD_MUTEXATTR_T
@@ -66,7 +68,6 @@
 #include <sys/user.h>		/* for coredump */
 #include <sys/proc_info.h>	/* for fill_procworkqueue */
 
-
 #include <mach/mach_port.h>
 #include <mach/mach_types.h>
 #include <mach/semaphore.h>
@@ -94,13 +95,44 @@
 #include <vm/vm_map.h>
 #include <mach/thread_act.h> /* for thread_resume */
 #include <machine/machine_routines.h>
+#include <mach/shared_region.h>
 
 #include <libkern/OSAtomic.h>
 
 #include <sys/pthread_shims.h>
 #include "kern_internal.h"
 
-uint32_t pthread_debug_tracing = 0;
+#if DEBUG
+#define kevent_qos_internal kevent_qos_internal_stub
+static int kevent_qos_internal_stub(__unused struct proc *p, __unused int fd, 
+						__unused user_addr_t changelist, __unused int nchanges, 
+						__unused user_addr_t eventlist, __unused int nevents,
+						__unused user_addr_t data_out, user_size_t *data_available,
+						__unused unsigned int flags, int32_t *retval){
+	if (data_available){	
+		static int i = 0;
+		switch (i++ % 4) {
+			case 0:
+			case 2:
+				*data_available = *data_available / 2;
+				*retval = 4;
+				break;
+			case 1:
+				*data_available = 0;
+				*retval = 4;
+				break;
+			case 3:
+				*retval = 0;
+				break;
+		}
+	} else {
+		*retval = 0;
+	}
+	return 0;
+}
+#endif /* DEBUG */
+
+uint32_t pthread_debug_tracing = 1;
 
 SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
 		   &pthread_debug_tracing, 0, "")
@@ -115,24 +147,28 @@ lck_attr_t   *pthread_lck_attr;
 extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
 extern void workqueue_thread_yielded(void);
 
-static boolean_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, boolean_t force_oc,
-					boolean_t  overcommit, pthread_priority_t oc_prio);
+enum run_nextreq_mode {RUN_NEXTREQ_DEFAULT, RUN_NEXTREQ_OVERCOMMIT, RUN_NEXTREQ_DEFERRED_OVERCOMMIT, RUN_NEXTREQ_UNCONSTRAINED, RUN_NEXTREQ_EVENT_MANAGER};
+static boolean_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, enum run_nextreq_mode mode, pthread_priority_t oc_prio);
 
 static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority);
 
-static void wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t th, struct threadlist *tl,
+static void wq_runreq(proc_t p, pthread_priority_t priority, thread_t th, struct threadlist *tl,
 		       int reuse_thread, int wake_thread, int return_directly);
 
-static int _setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t priority, int reuse_thread, struct threadlist *tl);
+static int _setup_wqthread(proc_t p, thread_t th, pthread_priority_t priority, int reuse_thread, struct threadlist *tl);
 
 static void wq_unpark_continue(void);
 static void wq_unsuspend_continue(void);
 
-static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread);
+static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t ignore_constrained_thread_limit);
 static void workqueue_removethread(struct threadlist *tl, int fromexit);
 static void workqueue_lock_spin(proc_t);
 static void workqueue_unlock(proc_t);
 
+static boolean_t may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, uint32_t my_priclass, boolean_t *start_timer);
+
+static mach_vm_offset_t stackaddr_hint(proc_t p);
+
 int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
 int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
 
@@ -143,8 +179,8 @@ int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
 #define C_32_STK_ALIGN          16
 #define C_64_STK_ALIGN          16
 #define C_64_REDZONE_LEN        128
-#define TRUNC_DOWN32(a,c)       ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
-#define TRUNC_DOWN64(a,c)       ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
+
+#define PTHREAD_T_OFFSET 0
 
 /*
  * Flags filed passed to bsdthread_create and back in pthread_start 
@@ -167,6 +203,39 @@ _________________________________________
 #define SCHED_FIFO       POLICY_FIFO
 #define SCHED_RR         POLICY_RR
 
+#define BASEPRI_DEFAULT 31
+
+#pragma mark - Process/Thread Setup/Teardown syscalls
+
+static mach_vm_offset_t stackaddr_hint(proc_t p __unused){
+	mach_vm_offset_t stackaddr;
+#if defined(__i386__) || defined(__x86_64__)
+	if (proc_is64bit(p)){
+		// Above nanomalloc range (see NANOZONE_SIGNATURE)
+		stackaddr = 0x700000000000;
+	} else {
+		stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386;
+	}
+#elif defined(__arm__) || defined(__arm64__)
+	if (proc_is64bit(p)){
+		// 64 stacks below nanomalloc (see NANOZONE_SIGNATURE)
+		stackaddr = 0x170000000 - 64 * PTH_DEFAULT_STACKSIZE;
+#if defined(__arm__)
+	} else if (pthread_kern->map_is_1gb(get_task_map(pthread_kern->proc_get_task(p)))){
+		stackaddr = SHARED_REGION_BASE_ARM - 32 * PTH_DEFAULT_STACKSIZE;
+#endif
+	} else {
+		stackaddr = SHARED_REGION_BASE_ARM + SHARED_REGION_SIZE_ARM;
+	}
+#else
+#error Need to define a stack address hint for this architecture
+#endif
+	return stackaddr;
+}
+
+/**
+ * bsdthread_create system call.  Used by pthread_create.
+ */
 int
 _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
 {
@@ -176,10 +245,7 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 	int allocated = 0;
 	mach_vm_offset_t stackaddr;
 	mach_vm_size_t th_allocsize = 0;
-	mach_vm_size_t user_stacksize;
-	mach_vm_size_t th_stacksize;
 	mach_vm_size_t th_guardsize;
-	mach_vm_offset_t th_stackaddr;
 	mach_vm_offset_t th_stack;
 	mach_vm_offset_t th_pthread;
 	mach_port_name_t th_thport;
@@ -199,11 +265,7 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 	isLP64 = proc_is64bit(p);
 	th_guardsize = vm_map_page_size(vmap);
 
-#if defined(__i386__) || defined(__x86_64__)
-	stackaddr = 0xB0000000;
-#else
-#error Need to define a stack address hint for this architecture
-#endif
+	stackaddr = stackaddr_hint(p);
 	kret = pthread_kern->thread_create(ctask, &th);
 	if (kret != KERN_SUCCESS)
 		return(ENOMEM);
@@ -212,61 +274,66 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 	sright = (void *)pthread_kern->convert_thread_to_port(th);
 	th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask));
 
-	if ((flags & PTHREAD_START_CUSTOM) == 0) {		
-		th_stacksize = (mach_vm_size_t)user_stack;		/* if it is custom them it is stacksize */
-		th_allocsize = th_stacksize + th_guardsize + pthread_kern->proc_get_pthsize(p);
-
-		kret = mach_vm_map(vmap, &stackaddr,
-    				th_allocsize,
-    				page_size-1,
-    				VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
-    				0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
-    				VM_INHERIT_DEFAULT);
-    		if (kret != KERN_SUCCESS)
-    			kret = mach_vm_allocate(vmap,
-    					&stackaddr, th_allocsize,
-    					VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
-    		if (kret != KERN_SUCCESS) {
+	if ((flags & PTHREAD_START_CUSTOM) == 0) {
+		mach_vm_size_t pthread_size =
+			vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(vmap));
+		th_allocsize = th_guardsize + user_stack + pthread_size;
+		user_stack += PTHREAD_T_OFFSET;
+
+		kret = mach_vm_map(vmap, &stackaddr, 
+				th_allocsize, 
+				page_size-1,
+				VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
+				0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
+				VM_INHERIT_DEFAULT);
+		if (kret != KERN_SUCCESS){
+			kret = mach_vm_allocate(vmap, 
+					&stackaddr, th_allocsize,
+					VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
+		}
+		if (kret != KERN_SUCCESS) {
 			error = ENOMEM;
 			goto out;
-    		}
+		}
 
 		PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
 
-		th_stackaddr = stackaddr;
 		allocated = 1;
-     		/*
+		/*
 		 * The guard page is at the lowest address
-     		 * The stack base is the highest address
+		 * The stack base is the highest address
 		 */
 		kret = mach_vm_protect(vmap,  stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
 
-    		if (kret != KERN_SUCCESS) { 
+		if (kret != KERN_SUCCESS) { 
 			error = ENOMEM;
 			goto out1;
-    		}
-		th_stack = (stackaddr + th_stacksize + th_guardsize);
-		th_pthread = (stackaddr + th_stacksize + th_guardsize);
-		user_stacksize = th_stacksize;
+		}
+
+		th_pthread = stackaddr + th_guardsize + user_stack;
+		th_stack = th_pthread;
 		
-	       /*
+		/*
 		* Pre-fault the first page of the new thread's stack and the page that will
 		* contain the pthread_t structure.
 		*/	
-		vm_fault( vmap,
-		  vm_map_trunc_page_mask(th_stack - PAGE_SIZE_64, vm_map_page_mask(vmap)),
-		  VM_PROT_READ | VM_PROT_WRITE,
-		  FALSE, 
-		  THREAD_UNINT, NULL, 0);
+		if (vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) != 
+				vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap))){
+			vm_fault( vmap,
+					vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
+					VM_PROT_READ | VM_PROT_WRITE,
+					FALSE, 
+					THREAD_UNINT, NULL, 0);
+		}
 		
 		vm_fault( vmap,
-		  vm_map_trunc_page_mask(th_pthread, vm_map_page_mask(vmap)),
-		  VM_PROT_READ | VM_PROT_WRITE,
-		  FALSE, 
-		  THREAD_UNINT, NULL, 0);
+				vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap)),
+				VM_PROT_READ | VM_PROT_WRITE,
+				FALSE, 
+				THREAD_UNINT, NULL, 0);
+
 	} else {
 		th_stack = user_stack;
-		user_stacksize = user_stack;
 		th_pthread = user_pthread;
 
 		PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
@@ -277,43 +344,41 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 	 * Set up i386 registers & function call.
 	 */
 	if (isLP64 == 0) {
-		x86_thread_state32_t state;
-		x86_thread_state32_t *ts = &state;
-
-		ts->eip = (unsigned int)pthread_kern->proc_get_threadstart(p);
-		ts->eax = (unsigned int)th_pthread;
-		ts->ebx = (unsigned int)th_thport;
-		ts->ecx = (unsigned int)user_func;
-		ts->edx = (unsigned int)user_funcarg;
-		ts->edi = (unsigned int)user_stacksize;
-		ts->esi = (unsigned int)flags;
-		/*
-		 * set stack pointer
-		 */
-		ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
+		x86_thread_state32_t state = {
+			.eip = (unsigned int)pthread_kern->proc_get_threadstart(p),
+			.eax = (unsigned int)th_pthread,
+			.ebx = (unsigned int)th_thport,
+			.ecx = (unsigned int)user_func,
+			.edx = (unsigned int)user_funcarg,
+			.edi = (unsigned int)user_stack,
+			.esi = (unsigned int)flags,
+			/*
+			 * set stack pointer
+			 */
+			.esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
+		};
 
-		error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
+		error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
 		if (error != KERN_SUCCESS) {
 			error = EINVAL;
 			goto out;
 		}
 	} else {
-		x86_thread_state64_t state64;
-		x86_thread_state64_t *ts64 = &state64;
-
-		ts64->rip = (uint64_t)pthread_kern->proc_get_threadstart(p);
-		ts64->rdi = (uint64_t)th_pthread;
-		ts64->rsi = (uint64_t)(th_thport);
-		ts64->rdx = (uint64_t)user_func;
-		ts64->rcx = (uint64_t)user_funcarg;
-		ts64->r8 = (uint64_t)user_stacksize;
-		ts64->r9 = (uint64_t)flags;
-		/*
-		 * set stack pointer aligned to 16 byte boundary
-		 */
-		ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
+		x86_thread_state64_t state64 = {
+			.rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
+			.rdi = (uint64_t)th_pthread,
+			.rsi = (uint64_t)(th_thport),
+			.rdx = (uint64_t)user_func,
+			.rcx = (uint64_t)user_funcarg,
+			.r8 = (uint64_t)user_stack,
+			.r9 = (uint64_t)flags,
+			/*
+			 * set stack pointer aligned to 16 byte boundary
+			 */
+			.rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN)
+		};
 
-		error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)ts64);
+		error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
 		if (error != KERN_SUCCESS) {
 			error = EINVAL;
 			goto out;
@@ -321,27 +386,26 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 		
 	}
 #elif defined(__arm__)
-	arm_thread_state_t state;
-	arm_thread_state_t *ts = &state;
-	
-	ts->pc = (int)pthread_kern->proc_get_threadstart(p);
-	ts->r[0] = (unsigned int)th_pthread;
-	ts->r[1] = (unsigned int)th_thport;
-	ts->r[2] = (unsigned int)user_func;
-	ts->r[3] = (unsigned int)user_funcarg;
-	ts->r[4] = (unsigned int)user_stacksize;
-	ts->r[5] = (unsigned int)flags;
-
-	/* Set r7 & lr to 0 for better back tracing */
-	ts->r[7] = 0;
-	ts->lr = 0;
-
-	/*	
-	 * set stack pointer
-	 */
-	ts->sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
+	arm_thread_state_t state = {
+		.pc = (int)pthread_kern->proc_get_threadstart(p),
+		.r[0] = (unsigned int)th_pthread,
+		.r[1] = (unsigned int)th_thport,
+		.r[2] = (unsigned int)user_func,
+		.r[3] = (unsigned int)user_funcarg,
+		.r[4] = (unsigned int)user_stack,
+		.r[5] = (unsigned int)flags,
+
+		/* Set r7 & lr to 0 for better back tracing */
+		.r[7] = 0,
+		.lr = 0,
+
+		/*	
+		 * set stack pointer
+		 */
+		.sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
+	};
 
-	(void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
+	(void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
 
 #else
 #error bsdthread_create  not defined for this architecture
@@ -363,7 +427,6 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 
 		thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
 
-#define BASEPRI_DEFAULT 31
 		precedinfo.importance = (importance - BASEPRI_DEFAULT);
 		thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
 	} else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
@@ -387,13 +450,14 @@ _bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcar
 
 	PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
 
-	*retval = th_pthread;
+	// cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms
+	*retval = (user_addr_t)th_pthread;
 
 	return(0);
 
 out1:
 	if (allocated != 0) {
-		(void)mach_vm_deallocate(vmap,  stackaddr, th_allocsize);
+		(void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
 	}
 out:
 	(void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
@@ -402,6 +466,9 @@ out:
 	return(error);
 }
 
+/**
+ * bsdthread_terminate system call.  Used by pthread_terminate
+ */
 int
 _bsdthread_terminate(__unused struct proc *p,
 		     user_addr_t stackaddr,
@@ -450,6 +517,10 @@ _bsdthread_terminate(__unused struct proc *p,
 	return(0);
 }
 
+/**
+ * bsdthread_register system call.  Performs per-process setup.  Responsible for
+ * returning capabilitiy bits to userspace and receiving userspace function addresses.
+ */
 int
 _bsdthread_register(struct proc *p,
 		    user_addr_t threadstart,
@@ -526,6 +597,8 @@ _bsdthread_register(struct proc *p,
 	return(0);
 }
 
+#pragma mark - QoS Manipulation
+
 int
 _bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
 {
@@ -667,7 +740,7 @@ _bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priori
 			 */
 			if (old_active == wq->wq_reqconc[old_bucket]) {
 				/* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */
-				(void)workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0);
+				(void)workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_DEFAULT, 0);
 			} else {
 				workqueue_unlock(p);
 			}
@@ -687,14 +760,28 @@ voucher:
 
 fixedpri:
 	if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
-		thread_extended_policy_data_t extpol;
+		thread_extended_policy_data_t extpol = {.timeshare = 0};
 		thread_t thread = current_thread();
 		
-		extpol.timeshare = 0;
+		struct threadlist *tl = util_get_thread_threadlist_entry(thread);
+		if (tl) {
+			/* Not allowed on workqueue threads */
+			fixedpri_rv = ENOTSUP;
+			goto done;
+		}
+
+		kr = pthread_kern->thread_policy_set_internal(thread, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
+		if (kr != KERN_SUCCESS) {
+			fixedpri_rv = EINVAL;
+			goto done;
+		}
+	} else if ((flags & _PTHREAD_SET_SELF_TIMESHARE_FLAG) != 0) {
+		thread_extended_policy_data_t extpol = {.timeshare = 1};
+		thread_t thread = current_thread();
 		
 		struct threadlist *tl = util_get_thread_threadlist_entry(thread);
 		if (tl) {
-			/* Not allowed on workqueue threads, since there is no symmetric clear function */
+			/* Not allowed on workqueue threads */
 			fixedpri_rv = ENOTSUP;
 			goto done;
 		}
@@ -875,6 +962,9 @@ _bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t ar
 	}
 }
 
+#pragma mark - Workqueue Implementation
+#pragma mark sysctls
+
 uint32_t wq_yielded_threshold		= WQ_YIELDED_THRESHOLD;
 uint32_t wq_yielded_window_usecs	= WQ_YIELDED_WINDOW_USECS;
 uint32_t wq_stalled_window_usecs	= WQ_STALLED_WINDOW_USECS;
@@ -882,7 +972,7 @@ uint32_t wq_reduce_pool_window_usecs	= WQ_REDUCE_POOL_WINDOW_USECS;
 uint32_t wq_max_timer_interval_usecs	= WQ_MAX_TIMER_INTERVAL_USECS;
 uint32_t wq_max_threads			= WORKQUEUE_MAXTHREADS;
 uint32_t wq_max_constrained_threads	= WORKQUEUE_MAXTHREADS / 8;
-
+uint32_t wq_max_concurrency = 1; // set to ncpus on load
 
 SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &wq_yielded_threshold, 0, "");
@@ -905,9 +995,17 @@ SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
 SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &wq_max_constrained_threads, 0, "");
 
+#ifdef DEBUG
+SYSCTL_INT(_kern, OID_AUTO, wq_max_concurrency, CTLFLAG_RW | CTLFLAG_LOCKED,
+		   &wq_max_concurrency, 0, "");
+
+static int wq_kevent_test SYSCTL_HANDLER_ARGS;
+SYSCTL_PROC(_debug, OID_AUTO, wq_kevent_test, CTLFLAG_MASKED | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLTYPE_OPAQUE, NULL, 0, wq_kevent_test, 0, "-");
+#endif
 
 static uint32_t wq_init_constrained_limit = 1;
 
+#pragma mark workqueue lock
 
 void
 _workqueue_init_lock(proc_t p)
@@ -935,12 +1033,20 @@ workqueue_unlock(proc_t p)
 	lck_spin_unlock(pthread_kern->proc_get_wqlockptr(p));
 }
 
+#pragma mark workqueue add timer
 
+/**
+ * Sets up the timer which will call out to workqueue_add_timer
+ */
 static void
 workqueue_interval_timer_start(struct workqueue *wq)
 {
 	uint64_t deadline;
 
+	/* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the
+	 ATIMER_RUNNING flag is not present.  The net effect here is that if a
+	 sequence of threads is required, we'll double the time before we give out
+	 the next one. */
 	if (wq->wq_timer_interval == 0) {
 		wq->wq_timer_interval = wq_stalled_window_usecs;
 
@@ -958,7 +1064,9 @@ workqueue_interval_timer_start(struct workqueue *wq)
 	PTHREAD_TRACE(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0);
 }
 
-
+/**
+ * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts
+ */
 static boolean_t
 wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
 {
@@ -1001,7 +1109,6 @@ wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
 	return (FALSE);
 }
 
-
 #define WQ_TIMER_NEEDED(wq, start_timer) do {		\
 	int oldflags = wq->wq_flags;			\
 							\
@@ -1011,17 +1118,16 @@ wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
 	}							\
 } while (0)
 
-
-
+/**
+ * handler function for the timer
+ */
 static void
 workqueue_add_timer(struct workqueue *wq, __unused int param1)
 {
 	proc_t		p;
 	boolean_t	start_timer = FALSE;
 	boolean_t	retval;
-	boolean_t	add_thread;
-	uint32_t	busycount;
-		
+
 	PTHREAD_TRACE(TRACE_wq_add_timer | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0);
 
 	p = wq->wq_proc;
@@ -1067,9 +1173,8 @@ workqueue_add_timer(struct workqueue *wq, __unused int param1)
 
 again:
 	retval = TRUE;
-	add_thread = FALSE;
-
 	if ( !(wq->wq_flags & WQ_EXITING)) {
+		boolean_t add_thread = FALSE;
 		/*
 		 * check to see if the stall frequency was beyond our tolerance
 		 * or we have work on the queue, but haven't scheduled any 
@@ -1077,46 +1182,50 @@ again:
 		 * there were no idle threads left to schedule
 		 */
 		if (wq->wq_reqcount) {
-			uint32_t	priclass;
-			uint32_t	thactive_count;
-			uint32_t	i;
-			uint64_t	curtime;
-
-			for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
-				if (wq->wq_requests[priclass])
-					break;
+			uint32_t	priclass = 0;
+			uint32_t	thactive_count = 0;
+			uint64_t	curtime = mach_absolute_time();
+			uint64_t	busycount = 0;
+
+			if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
+				wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
+				priclass = WORKQUEUE_EVENT_MANAGER_BUCKET;
+			} else {
+				for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
+					if (wq->wq_requests[priclass])
+						break;
+				}
 			}
-			assert(priclass < WORKQUEUE_NUM_BUCKETS);
 
-			curtime = mach_absolute_time();
-			busycount = 0;
-			thactive_count = 0;
-
-			/*
-			 * check for conditions under which we would not add a thread, either
-			 *   a) we've got as many running threads as we want in this priority
-			 *      band and the priority bands above it
-			 *
-			 *   b) check to see if the priority group has blocked threads, if the
-			 *      last blocked timestamp is old enough, we will have already passed
-			 *      (a) where we would have stopped if we had enough active threads.
-			 */
-			for (i = 0; i <= priclass; i++) {
-				
-				thactive_count += wq->wq_thactive_count[i];
+			if (priclass < WORKQUEUE_EVENT_MANAGER_BUCKET){
+				/*
+				 * Compute a metric for many how many threads are active.  We
+				 * find the highest priority request outstanding and then add up
+				 * the number of active threads in that and all higher-priority
+				 * buckets.  We'll also add any "busy" threads which are not
+				 * active but blocked recently enough that we can't be sure
+				 * they've gone idle yet.  We'll then compare this metric to our
+				 * max concurrency to decide whether to add a new thread.
+				 */
+				for (uint32_t i = 0; i <= priclass; i++) {
+					thactive_count += wq->wq_thactive_count[i];
 
-				if (wq->wq_thscheduled_count[i]) {
-					if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i]))
-						busycount++;
+					// XXX why isn't this checking thscheduled_count < thactive_count ?
+					if (wq->wq_thscheduled_count[i]) {
+						if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i]))
+							busycount++;
+					}
 				}
 			}
-			if (thactive_count + busycount < wq->wq_max_concurrency) {
+
+			if (thactive_count + busycount < wq->wq_max_concurrency ||
+				priclass == WORKQUEUE_EVENT_MANAGER_BUCKET) {
 
 				if (wq->wq_thidlecount == 0) {
 					/*
 					 * if we have no idle threads, try to add one
 					 */
-					retval = workqueue_addnewthread(wq, FALSE);
+					retval = workqueue_addnewthread(wq, priclass == WORKQUEUE_EVENT_MANAGER_BUCKET);
 				}
 				add_thread = TRUE;
 			}
@@ -1131,7 +1240,7 @@ again:
 					 * workqueue_run_nextreq is responsible for
 					 * dropping the workqueue lock in all cases
 					 */
-					retval = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0);
+					retval = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_DEFAULT, 0);
 					workqueue_lock_spin(p);
 
 					if (retval == FALSE)
@@ -1150,6 +1259,12 @@ again:
 			}
 		}
 	}
+
+	/* 
+	 * If we called WQ_TIMER_NEEDED above, then this flag will be set if that
+	 * call marked the timer running.  If so, we let the timer interval grow.
+	 * Otherwise, we reset it back to 0.
+	 */
 	if ( !(wq->wq_flags & WQ_ATIMER_RUNNING))
 		wq->wq_timer_interval = 0;
 
@@ -1168,11 +1283,13 @@ again:
 
 	workqueue_unlock(p);
 
-        if (start_timer == TRUE)
-	        workqueue_interval_timer_start(wq);
+	if (start_timer == TRUE)
+		workqueue_interval_timer_start(wq);
 }
 
+#pragma mark thread state tracking
 
+// called by spinlock code when trying to yield to lock owner
 void
 _workqueue_thread_yielded(void)
 {
@@ -1226,27 +1343,7 @@ _workqueue_thread_yielded(void)
 				}
 			}
 			if (wq->wq_thidlecount) {
-				uint32_t	priority;
-				boolean_t	overcommit = FALSE;
-				boolean_t	force_oc = FALSE;
-
-				for (priority = 0; priority < WORKQUEUE_NUM_BUCKETS; priority++) {
-					if (wq->wq_requests[priority]) {
-						break;
-					}
-				}
-				assert(priority < WORKQUEUE_NUM_BUCKETS);
-
-				wq->wq_reqcount--;
-				wq->wq_requests[priority]--;
-
-				if (wq->wq_ocrequests[priority]) {
-					wq->wq_ocrequests[priority]--;
-					overcommit = TRUE;
-				} else
-					force_oc = TRUE;
-
-				(void)workqueue_run_nextreq(p, wq, THREAD_NULL, force_oc, overcommit, pthread_priority_from_class_index(priority));
+				(void)workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_UNCONSTRAINED, 0);
 				/*
 				 * workqueue_run_nextreq is responsible for
 				 * dropping the workqueue lock in all cases
@@ -1281,7 +1378,13 @@ workqueue_callback(int type, thread_t thread)
 
 		old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
 
-		if (old_activecount == wq->wq_reqconc[tl->th_priority]) {
+		/*
+		 * If we blocked and were at the requested concurrency previously, we may
+		 * need to spin up a new thread.  Of course, if it's the event manager
+		 * then that's moot, so ignore that case.
+		 */
+		if (old_activecount == wq->wq_reqconc[tl->th_priority] &&
+			tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET) {
 			uint64_t	curtime;
 			UInt64		*lastblocked_ptr;
 
@@ -1341,6 +1444,11 @@ _workqueue_get_sched_callback(void)
 	return workqueue_callback;
 }
 
+#pragma mark thread addition/removal
+
+/**
+ * pop goes the thread
+ */
 static void
 workqueue_removethread(struct threadlist *tl, int fromexit)
 {
@@ -1404,13 +1512,15 @@ workqueue_removethread(struct threadlist *tl, int fromexit)
 }
 
 
-/*
- * called with workq lock held
- * dropped and retaken around thread creation
- * return with workq lock held
+/**
+ * Try to add a new workqueue thread.
+ *
+ * - called with workq lock held
+ * - dropped and retaken around thread creation
+ * - return with workq lock held
  */
 static boolean_t
-workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
+workqueue_addnewthread(struct workqueue *wq, boolean_t ignore_constrained_thread_limit)
 {
 	struct threadlist *tl;
 	struct uthread	*uth;
@@ -1419,25 +1529,32 @@ workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
 	proc_t		p;
 	void 	 	*sright;
 	mach_vm_offset_t stackaddr;
-	mach_vm_size_t guardsize;
 
-	if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING)
+	if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING) {
+		PTHREAD_TRACE(TRACE_wq_thread_add_during_exit | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
 		return (FALSE);
+	}
 
 	if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (pthread_kern->config_thread_max - 20)) {
 		wq->wq_lflags |= WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
+
+		PTHREAD_TRACE(TRACE_wq_thread_limit_exceeded | DBG_FUNC_NONE, wq, wq->wq_nthreads, wq_max_threads,
+				pthread_kern->config_thread_max - 20, 0);
 		return (FALSE);
 	}
 	wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
 
-	if (oc_thread == FALSE && wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-		/*
-		 * if we're not creating this thread to service an overcommit request,
-		 * then check the size of the constrained thread pool...  if we've already
-		 * reached our max for threads scheduled from this pool, don't create a new
-		 * one... the callers of this function are prepared for failure.
+	if (ignore_constrained_thread_limit == FALSE &&
+		wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
+		/* 
+		 * If we're not creating this thread to service an overcommit or
+		 * event manager request, then we check to see if we are over our
+		 * constrained thread limit, in which case we error out.
 		 */
 		wq->wq_lflags |= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+
+		PTHREAD_TRACE(TRACE_wq_thread_constrained_maxed | DBG_FUNC_NONE, wq, wq->wq_constrained_threads_scheduled,
+				wq_max_constrained_threads, 0, 0);
 		return (FALSE);
 	}
 	if (wq->wq_constrained_threads_scheduled < wq_max_constrained_threads)
@@ -1450,20 +1567,19 @@ workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
 
 	kret = pthread_kern->thread_create_workq(wq->wq_task, (thread_continue_t)wq_unsuspend_continue, &th);
  	if (kret != KERN_SUCCESS) {
+		PTHREAD_TRACE(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 0, 0, 0);
 		goto failed;
 	}
 
 	tl = kalloc(sizeof(struct threadlist));
 	bzero(tl, sizeof(struct threadlist));
 
-#if defined(__i386__) || defined(__x86_64__)
-	stackaddr = 0xB0000000;
-#else
-#error Need to define a stack address hint for this architecture
-#endif
-	
-	guardsize = vm_map_page_size(wq->wq_map);
-	tl->th_allocsize = PTH_DEFAULT_STACKSIZE + guardsize + pthread_kern->proc_get_pthsize(p);
+	stackaddr = stackaddr_hint(p);
+
+	mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
+	mach_vm_size_t pthread_size = 
+		vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
+	tl->th_allocsize = guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
 
 	kret = mach_vm_map(wq->wq_map, &stackaddr,
     			tl->th_allocsize,
@@ -1473,19 +1589,23 @@ workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
     			VM_INHERIT_DEFAULT);
 
 	if (kret != KERN_SUCCESS) {
-	        kret = mach_vm_allocate(wq->wq_map,
-    					&stackaddr, tl->th_allocsize,
-    					VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
+		PTHREAD_TRACE(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 1, 0, 0);
+
+		kret = mach_vm_allocate(wq->wq_map,
+				&stackaddr, tl->th_allocsize,
+				VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
 	}
 	if (kret == KERN_SUCCESS) {
-	        /*
+		/*
 		 * The guard page is at the lowest address
 		 * The stack base is the highest address
 		 */
-	        kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
+		kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
 
-		if (kret != KERN_SUCCESS)
-		        (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
+		if (kret != KERN_SUCCESS) {
+			(void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
+			PTHREAD_TRACE(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 2, 0, 0);
+		}
 	}
 	if (kret != KERN_SUCCESS) {
 		(void) thread_terminate(th);
@@ -1529,7 +1649,9 @@ failed:
 	return (FALSE);
 }
 
-
+/**
+ * Setup per-process state for the workqueue.
+ */
 int
 _workq_open(struct proc *p, __unused int32_t *retval)
 {
@@ -1590,13 +1712,20 @@ _workq_open(struct proc *p, __unused int32_t *retval)
 		wq = (struct workqueue *)ptr;
 		wq->wq_flags = WQ_LIST_INITED;
 		wq->wq_proc = p;
-		wq->wq_max_concurrency = num_cpus;
+		wq->wq_max_concurrency = wq_max_concurrency;
 		wq->wq_task = current_task();
 		wq->wq_map  = pthread_kern->current_map();
 
 		for (i = 0; i < WORKQUEUE_NUM_BUCKETS; i++)
 			wq->wq_reqconc[i] = (uint16_t)wq->wq_max_concurrency;
 
+		// The event manager bucket is special, so its gets a concurrency of 1
+		// though we shouldn't ever read this value for that bucket
+		wq->wq_reqconc[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
+
+		// Always start the event manager at BACKGROUND
+		wq->wq_event_manager_priority = (uint32_t)pthread_qos_class_get_priority(THREAD_QOS_BACKGROUND) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+
 		TAILQ_INIT(&wq->wq_thrunlist);
 		TAILQ_INIT(&wq->wq_thidlelist);
 
@@ -1619,142 +1748,11 @@ out:
 	return(error);
 }
 
-
-int
-_workq_kernreturn(struct proc *p,
-		  int options,
-		  __unused user_addr_t item,
-		  int arg2,
-		  int arg3,
-		  __unused int32_t *retval)
-{
-	struct workqueue *wq;
-	int error	= 0;
-
-	if (pthread_kern->proc_get_register(p) == 0) {
-		return EINVAL;
-	}
-
-	switch (options) {
-	case WQOPS_QUEUE_NEWSPISUPP: {
-		/*
-		 * arg2 = offset of serialno into dispatch queue
-		 */
-		int offset = arg2;
-
-		pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
-		break;
-	}
-	case WQOPS_QUEUE_REQTHREADS: {
-		/*
-		 * arg2 = number of threads to start
-		 * arg3 = priority
-		 */
-		boolean_t overcommit = FALSE;
-		int reqcount	     = arg2;
-		pthread_priority_t priority = arg3;
-		int class;
-
-		overcommit = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0;
-		class = pthread_priority_get_class_index(priority);
-
-		if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS)) {
-			error = EINVAL;
-			break;
-		}
-
-		workqueue_lock_spin(p);
-
-		if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
-			workqueue_unlock(p);
-
-			error = EINVAL;
-			break;
-		}
-
-		if (!overcommit) {
-			wq->wq_reqcount += reqcount;
-			wq->wq_requests[class] += reqcount;
-
-			PTHREAD_TRACE(TRACE_wq_req_threads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
-
-			while (wq->wq_reqcount) {
-				if (!workqueue_run_one(p, wq, overcommit, priority))
-					break;
-			}
-		} else {
-			PTHREAD_TRACE(TRACE_wq_req_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
-
-			while (reqcount) {
-				if (!workqueue_run_one(p, wq, overcommit, priority))
-					break;
-				reqcount--;
-			}
-			if (reqcount) {
-				/*
-				 * we need to delay starting some of the overcommit requests...
-				 * we should only fail to create the overcommit threads if
-				 * we're at the max thread limit... as existing threads
-				 * return to the kernel, we'll notice the ocrequests
-				 * and spin them back to user space as the overcommit variety
-				 */
-				wq->wq_reqcount += reqcount;
-				wq->wq_requests[class] += reqcount;
-				wq->wq_ocrequests[class] += reqcount;
-
-				PTHREAD_TRACE(TRACE_wq_delay_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
-			}
-		}
-		workqueue_unlock(p);
-		break;
-	}
-
-	case WQOPS_THREAD_RETURN: {
-		thread_t th = current_thread();
-		struct uthread *uth = pthread_kern->get_bsdthread_info(th);
-		struct threadlist *tl = util_get_thread_threadlist_entry(th);
-
-		/* reset signal mask on the workqueue thread to default state */
-		if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
-			pthread_kern->proc_lock(p);
-			pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
-			pthread_kern->proc_unlock(p);
-		}
-
-		/* dropping WQ override counts has to be done outside the wq lock. */
-		wq_thread_override_reset(th, THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD);
-
-		workqueue_lock_spin(p);
-
-		if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL || !tl) {
-			workqueue_unlock(p);
-			
-			error = EINVAL;
-			break;
-		}
-		PTHREAD_TRACE(TRACE_wq_runitem | DBG_FUNC_END, wq, 0, 0, 0, 0);
-
-
-		(void)workqueue_run_nextreq(p, wq, th, FALSE, FALSE, 0);
-		/*
-		 * workqueue_run_nextreq is responsible for
-		 * dropping the workqueue lock in all cases
-		 */
-		break;
-	}
-
-	default:
-		error = EINVAL;
-		break;
-	}
-	return (error);
-}
-
 /*
  * Routine:	workqueue_mark_exiting
  *
  * Function:	Mark the work queue such that new threads will not be added to the
- *		work queue after we return.  
+ *		work queue after we return.
  *
  * Conditions:	Called against the current process.
  */
@@ -1771,7 +1769,7 @@ _workqueue_mark_exiting(struct proc *p)
 
 		/*
 		 * we now arm the timer in the callback function w/o holding the workq lock...
-		 * we do this by setting  WQ_ATIMER_RUNNING via OSCompareAndSwap in order to 
+		 * we do this by setting  WQ_ATIMER_RUNNING via OSCompareAndSwap in order to
 		 * insure only a single timer if running and to notice that WQ_EXITING has
 		 * been set (we don't want to start a timer once WQ_EXITING is posted)
 		 *
@@ -1860,149 +1858,470 @@ _workqueue_exit(struct proc *p)
 }
 
 
-static boolean_t
-workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority)
-{
-	boolean_t	ran_one;
-
-	if (wq->wq_thidlecount == 0) {
-		if (overcommit == FALSE) {
-			if (wq->wq_constrained_threads_scheduled < wq->wq_max_concurrency)
-				workqueue_addnewthread(wq, overcommit);
-		} else {
-			workqueue_addnewthread(wq, overcommit);
-
-			if (wq->wq_thidlecount == 0)
-				return (FALSE);
-		}
-	}
-	ran_one = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, overcommit, priority);
-	/*
-	 * workqueue_run_nextreq is responsible for
-	 * dropping the workqueue lock in all cases
-	 */
-	workqueue_lock_spin(p);
+#pragma mark workqueue thread manipulation
 
-	return (ran_one);
-}
+/**
+ * Entry point for libdispatch to ask for threads
+ */
+static int wqops_queue_reqthreads(struct proc *p, int reqcount, pthread_priority_t priority){
+	struct workqueue *wq;
 
+	boolean_t overcommit = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0;
+	int class = pthread_priority_get_class_index(priority);
 
+	boolean_t event_manager = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) != 0;
+	if (event_manager){
+		class = WORKQUEUE_EVENT_MANAGER_BUCKET;
+	}
 
-/*
- * workqueue_run_nextreq:
- *   called with the workqueue lock held...
- *   responsible for dropping it in all cases
- */
-static boolean_t
-workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
-		      boolean_t force_oc, boolean_t overcommit, pthread_priority_t oc_prio)
-{
-	thread_t th_to_run = THREAD_NULL;
-	thread_t th_to_park = THREAD_NULL;
-	int wake_thread = 0;
-	int reuse_thread = WQ_FLAG_THREAD_REUSE;
-	uint32_t priclass, orig_class;
-	uint32_t us_to_wait;
-	struct threadlist *tl = NULL;
-	struct uthread *uth = NULL;
+	if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS) || (overcommit && event_manager)) {
+		return EINVAL;
+	}
+	
+	workqueue_lock_spin(p);
+	
+	if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
+		workqueue_unlock(p);
+		
+		return EINVAL;
+	}
+	
+	if (overcommit == 0 && event_manager == 0) {
+		wq->wq_reqcount += reqcount;
+		wq->wq_requests[class] += reqcount;
+		
+		PTHREAD_TRACE(TRACE_wq_req_threads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
+		
+		while (wq->wq_reqcount) {
+			if (!workqueue_run_one(p, wq, overcommit, 0))
+				break;
+		}
+	} else if (overcommit){
+		PTHREAD_TRACE(TRACE_wq_req_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_ocrequests[class], reqcount, 0);
+		
+		while (reqcount) {
+			if (!workqueue_run_one(p, wq, overcommit, priority))
+				break;
+			reqcount--;
+		}
+		if (reqcount) {
+			/*
+			 * we need to delay starting some of the overcommit requests...
+			 * we should only fail to create the overcommit threads if
+			 * we're at the max thread limit... as existing threads
+			 * return to the kernel, we'll notice the ocrequests
+			 * and spin them back to user space as the overcommit variety
+			 */
+			wq->wq_reqcount += reqcount;
+			wq->wq_requests[class] += reqcount;
+			wq->wq_ocrequests[class] += reqcount;
+			
+			PTHREAD_TRACE(TRACE_wq_delay_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_ocrequests[class], reqcount, 0);
+
+			/* if we delayed this thread coming up but we're not constrained
+			 * or at max threads then we need to start the timer so we don't
+			 * risk dropping this request on the floor.
+			 */
+			if ((wq->wq_lflags & (WQL_EXCEEDED_TOTAL_THREAD_LIMIT | WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT)) == 0) {
+				boolean_t start_timer = FALSE;
+				WQ_TIMER_NEEDED(wq, start_timer);
+
+				if (start_timer) {
+					workqueue_interval_timer_start(wq);
+				}
+			}
+		}
+	} else if (event_manager) {
+		PTHREAD_TRACE(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, wq->wq_event_manager_priority, wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET], 0);
+
+		if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
+			wq->wq_reqcount += 1;
+			wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
+		}
+
+		// We've recorded the request for an event manager thread above.  We'll
+		// let the timer pick it up as we would for a kernel callout.  We can
+		// do a direct add/wakeup when that support is added for the kevent path.
+		boolean_t start_timer = FALSE;
+		if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0)
+			WQ_TIMER_NEEDED(wq, start_timer);
+		if (start_timer == TRUE)
+			workqueue_interval_timer_start(wq);
+	}
+	workqueue_unlock(p);
+
+	return 0;
+}
+
+/* Used by the kevent system to request threads.  Currently count is ignored
+ * and we always return one thread per invocation.
+ */
+thread_t _workq_reqthreads(struct proc *p, int requests_count, workq_reqthreads_req_t requests){
 	boolean_t start_timer = FALSE;
-	boolean_t adjust_counters = TRUE;
-	uint64_t	curtime;
-	uint32_t	thactive_count;
-	uint32_t	busycount;
+	assert(requests_count > 0);
+
+#if DEBUG
+	// Make sure that the requests array is sorted, highest priority first
+	if (requests_count > 1){
+		__assert_only qos_class_t priority = _pthread_priority_get_qos_newest(requests[0].priority);
+		__assert_only unsigned long flags = ((_pthread_priority_get_flags(requests[0].priority) & (_PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) != 0);
+		for (int i = 1; i < requests_count; i++){
+			if (requests[i].count == 0) continue;
+			__assert_only qos_class_t next_priority = _pthread_priority_get_qos_newest(requests[i].priority);
+			__assert_only unsigned long next_flags = ((_pthread_priority_get_flags(requests[i].priority) & (_PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) != 0);
+			if (next_flags != flags){
+				flags = next_flags;
+				priority = next_priority;
+			} else {
+				assert(next_priority <= priority);
+			}
+		}
+	}
+#endif // DEBUG
 
-	PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_reqcount, 0);
+	int error = 0;
+	struct workqueue *wq;
 
-	if (thread != THREAD_NULL) {
-		uth = pthread_kern->get_bsdthread_info(thread);
+	workqueue_lock_spin(p);
 
-		if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL) {
-			panic("wq thread with no threadlist");
+	if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
+		error = EINVAL;
+		goto done;
+	}
+
+	PTHREAD_TRACE(TRACE_wq_kevent_req_threads | DBG_FUNC_START, wq, requests_count, 0, 0, 0);
+
+	// Look for overcommit or event-manager-only requests.
+	boolean_t have_overcommit = FALSE;
+	pthread_priority_t priority = 0;
+	for (int i = 0; i < requests_count; i++){
+		if (requests[i].count == 0)
+			continue;
+		priority = requests[i].priority;
+		if (_pthread_priority_get_qos_newest(priority) == QOS_CLASS_UNSPECIFIED){
+			priority |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+		}
+		if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) != 0){
+			goto event_manager;
+		}
+		if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
+			have_overcommit = TRUE;
+			break;
 		}
 	}
 
+	if (have_overcommit){
+		// I can't make this call, since it's not safe from some contexts yet,
+		// so just setup a delayed overcommit and let the timer do the work
+		//boolean_t success = workqueue_run_one(p, wq, TRUE, priority);
+		if (/* !success */ TRUE){
+			int class = pthread_priority_get_class_index(priority);
+			wq->wq_reqcount += 1;
+			wq->wq_requests[class] += 1;
+			wq->wq_kevent_ocrequests[class] += 1;
+			
+			PTHREAD_TRACE(TRACE_wq_req_kevent_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_kevent_ocrequests[class], 1, 0);
+
+			WQ_TIMER_NEEDED(wq, start_timer);
+		}
+		goto done;
+	}
+
+	// Having no overcommit requests, try to find any request that can start
+	// There's no TOCTTOU since we hold the workqueue lock
+	for (int i = 0; i < requests_count; i++){
+		workq_reqthreads_req_t req = requests + i;
+		priority = req->priority;
+
+		if (req->count == 0)
+			continue;
+
+		int class = pthread_priority_get_class_index(priority);
+
+		// Ask if we can start a new thread at the given class.  Pass NUM_BUCKETS as
+		// my class to indicate we won't reuse this thread
+		if (may_start_constrained_thread(wq, class, WORKQUEUE_NUM_BUCKETS, NULL)){
+			wq->wq_reqcount += 1;
+			wq->wq_requests[class] += 1;
+			wq->wq_kevent_requests[class] += 1;
+		
+			PTHREAD_TRACE(TRACE_wq_req_kevent_threads | DBG_FUNC_NONE, wq, priority, wq->wq_kevent_requests[class], 1, 0);
+
+			// I can't make this call because it's not yet safe to make from
+			// scheduler callout context, so instead we'll just start up the timer
+			// which will spin up the thread when it files.
+			// workqueue_run_one(p, wq, FALSE, priority);
+
+			WQ_TIMER_NEEDED(wq, start_timer);
+
+			goto done;
+		}
+	}
+
+	// Okay, here's the fun case: we can't spin up any of the non-overcommit threads
+	// that we've seen a request for, so we kick this over to the event manager thread
+
+event_manager:
+	PTHREAD_TRACE(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, wq->wq_event_manager_priority, wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET], 0);
+
+	if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
+		wq->wq_reqcount += 1;
+		wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
+	}
+	wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
+
+	if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0)
+		WQ_TIMER_NEEDED(wq, start_timer);
+
+done:
+	workqueue_unlock(p);
+
+	if (start_timer == TRUE)
+		workqueue_interval_timer_start(wq);
+
+	PTHREAD_TRACE(TRACE_wq_kevent_req_threads | DBG_FUNC_END, wq, start_timer, 0, 0, 0);
+
+	return THREAD_NULL;
+}
+
+
+static int wqops_thread_return(struct proc *p){
+	thread_t th = current_thread();
+	struct uthread *uth = pthread_kern->get_bsdthread_info(th);
+	struct threadlist *tl = util_get_thread_threadlist_entry(th);
+	
+	/* reset signal mask on the workqueue thread to default state */
+	if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
+		pthread_kern->proc_lock(p);
+		pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
+		pthread_kern->proc_unlock(p);
+	}
+	
+	/* dropping WQ override counts has to be done outside the wq lock. */
+	wq_thread_override_reset(th, THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD);
+	
+	workqueue_lock_spin(p);
+
+	struct workqueue *wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
+	if (wq == NULL || !tl) {
+		workqueue_unlock(p);
+		
+		return EINVAL;
+	}
+	PTHREAD_TRACE(TRACE_wq_runitem | DBG_FUNC_END, wq, 0, 0, 0, 0);
+
+	(void)workqueue_run_nextreq(p, wq, th, RUN_NEXTREQ_DEFAULT, 0);
 	/*
-	 * from here until we drop the workq lock
-	 * we can't be pre-empted since we hold 
-	 * the lock in spin mode... this is important
-	 * since we have to independently update the priority that 
-	 * the thread is associated with and the priorty based
-	 * counters that "workqueue_callback" also changes and bases
-	 * decisons on.
+	 * workqueue_run_nextreq is responsible for
+	 * dropping the workqueue lock in all cases
 	 */
-dispatch_overcommit:
+	return 0;
+}
 
-	if (overcommit || force_oc) {
-		priclass = pthread_priority_get_class_index(oc_prio);
+/**
+ * Multiplexed call to interact with the workqueue mechanism
+ */
+int
+_workq_kernreturn(struct proc *p,
+		  int options,
+		  __unused user_addr_t item,
+		  int arg2,
+		  int arg3,
+		  int32_t *retval)
+{
+	int error = 0;
+
+	if (pthread_kern->proc_get_register(p) == 0) {
+		return EINVAL;
+	}
 
-		if (thread != THREAD_NULL) {
-			th_to_run = thread;
-			goto pick_up_work;
+	switch (options) {
+	case WQOPS_QUEUE_NEWSPISUPP: {
+		/*
+		 * arg2 = offset of serialno into dispatch queue
+		 * arg3 = kevent support
+		 */
+		int offset = arg2;
+		if (arg3 & 0x01){
+			// If we get here, then userspace has indicated support for kevent delivery.
 		}
-		goto grab_idle_thread;
+
+		pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
+		break;
 	}
-	if (wq->wq_reqcount) {
-		for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
-			if (wq->wq_requests[priclass])
-				break;
+	case WQOPS_QUEUE_REQTHREADS: {
+		/*
+		 * arg2 = number of threads to start
+		 * arg3 = priority
+		 */
+		error = wqops_queue_reqthreads(p, arg2, arg3);
+		break;
+	}
+	case WQOPS_SET_EVENT_MANAGER_PRIORITY: {
+		/*
+		 * arg2 = priority for the manager thread
+		 *
+		 * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the
+		 * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead
+		 * of a QOS value
+		 */
+		pthread_priority_t pri = arg2;
+
+		workqueue_lock_spin(p);
+		struct workqueue *wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
+		if (wq == NULL ) {
+			workqueue_unlock(p);
+			error = EINVAL;
+			break;
+		}
+		if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
+			// If userspace passes a scheduling priority, that takes precidence
+			// over any QoS.  (So, userspace should take care not to accidenatally
+			// lower the priority this way.)
+			uint32_t sched_pri = pri & (~_PTHREAD_PRIORITY_FLAGS_MASK);
+			if (wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
+				wq->wq_event_manager_priority = MAX(sched_pri, wq->wq_event_manager_priority & (~_PTHREAD_PRIORITY_FLAGS_MASK))
+						| _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+			} else {
+				wq->wq_event_manager_priority = sched_pri
+						| _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+			}
+		} else if ((wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
+			int cur_qos = pthread_priority_get_qos_class(wq->wq_event_manager_priority);
+			int new_qos = pthread_priority_get_qos_class(pri);
+			wq->wq_event_manager_priority = (uint32_t)pthread_qos_class_get_priority(MAX(cur_qos, new_qos)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+		}
+		workqueue_unlock(p);
+		break;
+	}
+	case WQOPS_THREAD_KEVENT_RETURN: {
+		int32_t kevent_retval;
+		int ret = kevent_qos_internal(p, -1, item, arg2, item, arg2, NULL, NULL, KEVENT_FLAG_WORKQ | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS, &kevent_retval);
+		// We shouldn't be getting more errors out than events we put in, so
+		// reusing the input buffer should always provide enough space
+		assert(ret == KERN_SUCCESS && kevent_retval >= 0);
+		if (ret != KERN_SUCCESS){
+			error = ret;
+			break;
+		} else if (kevent_retval > 0){
+			assert(kevent_retval <= arg2);
+			*retval = kevent_retval;
+			error = 0;
+			break;
 		}
-		assert(priclass < WORKQUEUE_NUM_BUCKETS);
+	} /* FALLTHROUGH */
+	case WQOPS_THREAD_RETURN: {
+		error = wqops_thread_return(p);
+		// NOT REACHED except in case of error
+		assert(error);
+		break;
+	}
+	default:
+		error = EINVAL;
+		break;
+	}
+	return (error);
+}
 
-		if (wq->wq_ocrequests[priclass] && (thread != THREAD_NULL || wq->wq_thidlecount)) {
-			/*
-			 * handle delayed overcommit request...
-			 * they have priority over normal requests
-			 * within a given priority level
-			 */
-			wq->wq_reqcount--;
-			wq->wq_requests[priclass]--;
-			wq->wq_ocrequests[priclass]--;
 
-			oc_prio = pthread_priority_from_class_index(priclass);
-			overcommit = TRUE;
+static boolean_t
+workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority)
+{
+	boolean_t	ran_one;
 
-			goto dispatch_overcommit;
+	if (wq->wq_thidlecount == 0) {
+		if (overcommit == FALSE) {
+			if (wq->wq_constrained_threads_scheduled < wq->wq_max_concurrency)
+				workqueue_addnewthread(wq, overcommit);
+		} else {
+			workqueue_addnewthread(wq, overcommit);
+
+			if (wq->wq_thidlecount == 0)
+				return (FALSE);
 		}
 	}
+	ran_one = workqueue_run_nextreq(p, wq, THREAD_NULL, overcommit ? RUN_NEXTREQ_OVERCOMMIT : RUN_NEXTREQ_DEFAULT, priority);
 	/*
-	 * if we get here, the work should be handled by a constrained thread
+	 * workqueue_run_nextreq is responsible for
+	 * dropping the workqueue lock in all cases
 	 */
-	if (wq->wq_reqcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
-		/*
-		 * no work to do, or we're already at or over the scheduling limit for
-		 * constrained threads...  just return or park the thread...
-		 * do not start the timer for this condition... if we don't have any work,
-		 * we'll check again when new work arrives... if we're over the limit, we need 1 or more
-		 * constrained threads to return to the kernel before we can dispatch additional work
-		 */
-	        if ((th_to_park = thread) == THREAD_NULL)
-		        goto out_of_work;
-		goto parkit;
+	workqueue_lock_spin(p);
+
+	return (ran_one);
+}
+
+/*
+ * this is a workqueue thread with no more
+ * work to do... park it for now
+ */
+static void 
+parkit(struct workqueue *wq, struct threadlist *tl, thread_t thread)
+{
+	uint32_t us_to_wait;
+	
+	TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
+	tl->th_flags &= ~TH_LIST_RUNNING;
+
+	tl->th_flags |= TH_LIST_BLOCKED;
+	TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
+
+	pthread_kern->thread_sched_call(thread, NULL);
+
+	OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
+	wq->wq_thscheduled_count[tl->th_priority]--;
+	wq->wq_threads_scheduled--;
+
+	if (tl->th_flags & TH_LIST_CONSTRAINED) {
+		wq->wq_constrained_threads_scheduled--;
+		wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+		tl->th_flags &= ~TH_LIST_CONSTRAINED;
 	}
 
-	thactive_count = 0;
-	busycount = 0;
+	if (wq->wq_thidlecount < 100)
+		us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
+	else
+		us_to_wait = wq_reduce_pool_window_usecs / 100;
+
+	wq->wq_thidlecount++;
+	wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
+
+	assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
+			TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
+			wq_reduce_pool_window_usecs, NSEC_PER_USEC);
+	
+	PTHREAD_TRACE1(TRACE_wq_thread_park | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(thread));
+}
 
-	curtime = mach_absolute_time();
+static boolean_t may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, uint32_t my_priclass, boolean_t *start_timer){
+	if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
+		/*
+		 * we need 1 or more constrained threads to return to the kernel before
+		 * we can dispatch additional work
+		 */
+		return FALSE;
+	}
 
-	thactive_count += wq->wq_thactive_count[priclass];
+	uint32_t busycount = 0;
+	uint32_t thactive_count = wq->wq_thactive_count[at_priclass];
 
-	if (wq->wq_thscheduled_count[priclass]) {
-		if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[priclass])) {
+	// Has our most recently blocked thread blocked recently enough that we
+	// should still consider it busy?
+	// XXX should this be wq->wq_thscheduled_count[at_priclass] > thactive_count ?
+	if (wq->wq_thscheduled_count[at_priclass]) {
+		if (wq_thread_is_busy(mach_absolute_time(), &wq->wq_lastblocked_ts[at_priclass])) {
 			busycount++;
 		}
 	}
 
-	if (thread != THREAD_NULL) {
-		if (tl->th_priority == priclass) {
-			/*
-			 * dont't count this thread as currently active
-			 */
-			thactive_count--;
-		}
+	if (my_priclass < WORKQUEUE_NUM_BUCKETS && my_priclass == at_priclass){
+		/*
+		 * dont't count this thread as currently active
+		 */
+		thactive_count--;
 	}
+
 	if (thactive_count + busycount >= wq->wq_max_concurrency) {
-		if (busycount) {
+		if (busycount && start_timer) {
 				/*
 				 * we found at least 1 thread in the
 				 * 'busy' state... make sure we start
@@ -2012,16 +2331,182 @@ dispatch_overcommit:
 				 * to kick off the timer... we need to
 				 * start it now...
 				 */
-				WQ_TIMER_NEEDED(wq, start_timer);
+				WQ_TIMER_NEEDED(wq, *start_timer);
 		}
 
-		PTHREAD_TRACE(TRACE_wq_overcommitted|DBG_FUNC_NONE, wq, (start_timer ? 1<<7 : 0) | pthread_priority_from_class_index(priclass), thactive_count, busycount, 0);
+		PTHREAD_TRACE(TRACE_wq_overcommitted|DBG_FUNC_NONE, wq, (start_timer ? 1<<7 : 0) | pthread_priority_from_class_index(at_priclass), thactive_count, busycount, 0);
 
-		if ((th_to_park = thread) == THREAD_NULL) {
-			goto out_of_work;
+		return FALSE;
+	}
+	return TRUE;
+}
+
+static struct threadlist *pop_from_thidlelist(struct workqueue *wq, uint32_t priclass, int *upcall_flags, int *wake_thread){
+	struct threadlist *tl = TAILQ_FIRST(&wq->wq_thidlelist);
+	TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
+	wq->wq_thidlecount--;	
+
+	TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
+
+	if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
+		tl->th_flags &= ~TH_LIST_SUSPENDED;
+		*upcall_flags &= ~WQ_FLAG_THREAD_REUSE;
+
+	} else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
+		tl->th_flags &= ~TH_LIST_BLOCKED;
+		*wake_thread = 1;
+	}
+	tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
+
+	wq->wq_threads_scheduled++;
+	wq->wq_thscheduled_count[priclass]++;
+	OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
+
+	return tl;
+}
+
+static void
+reset_to_priority(struct threadlist *tl, pthread_priority_t pri){
+	kern_return_t ret;
+	thread_t th = tl->th_thread;
+
+	if (tl->th_flags & TH_LIST_EVENT_MGR_SCHED_PRI){
+		thread_precedence_policy_data_t  precedinfo = {
+			.importance = 0
+		};
+		ret = pthread_kern->thread_policy_set_internal(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
+		assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
+		tl->th_flags &= ~TH_LIST_EVENT_MGR_SCHED_PRI;
+	}
+
+	thread_qos_policy_data_t qosinfo = {
+		.qos_tier = pthread_priority_get_qos_class(pri),
+		.tier_importance = 0
+	};
+	ret = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qosinfo, THREAD_QOS_POLICY_COUNT);
+	assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
+}
+
+static void
+reset_to_schedpri(struct threadlist *tl, pthread_priority_t pri){
+	kern_return_t ret;
+	thread_t th = tl->th_thread;
+
+	thread_qos_policy_data_t qosinfo = {
+		.qos_tier = THREAD_QOS_UNSPECIFIED,
+		.tier_importance = 0
+	};
+	ret = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qosinfo, THREAD_QOS_POLICY_COUNT);
+	assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
+
+	thread_precedence_policy_data_t  precedinfo = {
+		.importance = ((pri & (~_PTHREAD_PRIORITY_FLAGS_MASK)) - BASEPRI_DEFAULT)
+	};
+	ret = pthread_kern->thread_policy_set_internal(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
+	assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
+
+	tl->th_flags |= TH_LIST_EVENT_MGR_SCHED_PRI;
+}
+
+/**
+ * grabs a thread for a request
+ *
+ *  - called with the workqueue lock held...
+ *  - responsible for dropping it in all cases
+ *  - if provided mode is for overcommit, doesn't consume a reqcount
+ *
+ */
+static boolean_t
+workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
+		      enum run_nextreq_mode mode, pthread_priority_t oc_prio)
+{
+	thread_t th_to_run = THREAD_NULL;
+	int wake_thread = 0;
+	int upcall_flags = WQ_FLAG_THREAD_REUSE;
+	uint32_t priclass;
+	struct threadlist *tl = NULL;
+	struct uthread *uth = NULL;
+	boolean_t start_timer = FALSE;
+
+	// valid modes to call this function with
+	assert(mode == RUN_NEXTREQ_DEFAULT || mode == RUN_NEXTREQ_OVERCOMMIT || mode == RUN_NEXTREQ_UNCONSTRAINED);
+	// may only have a priority if in OVERCOMMIT mode
+	assert(mode == RUN_NEXTREQ_OVERCOMMIT || oc_prio == 0);
+	// thread == thread_null means "please spin up a new workqueue thread, we can't reuse this"
+	// thread != thread_null is thread reuse, and must be the current thread
+	assert(thread == THREAD_NULL || thread == current_thread());
+
+	PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_reqcount, 0);
+
+	if (thread != THREAD_NULL) {
+		uth = pthread_kern->get_bsdthread_info(thread);
+
+		if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL) {
+			panic("wq thread with no threadlist");
 		}
+	}
 
-		goto parkit;
+	/*
+	 * from here until we drop the workq lock
+	 * we can't be pre-empted since we hold 
+	 * the lock in spin mode... this is important
+	 * since we have to independently update the priority that 
+	 * the thread is associated with and the priorty based
+	 * counters that "workqueue_callback" also changes and bases
+	 * decisons on.
+	 */
+
+	if (mode == RUN_NEXTREQ_OVERCOMMIT) {
+		priclass = pthread_priority_get_class_index(oc_prio);
+		upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
+	} else if (wq->wq_reqcount == 0){
+		// no work to do.  we'll check again when new work arrives.
+		goto done;
+	} else if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
+			   ((wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0) ||
+				(thread != THREAD_NULL && tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))){
+		// There's an event manager request and either:
+		//   - no event manager currently running
+		//   - we are re-using the event manager
+		mode = RUN_NEXTREQ_EVENT_MANAGER;
+		priclass = WORKQUEUE_EVENT_MANAGER_BUCKET;
+		upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
+		if (wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET])
+			upcall_flags |= WQ_FLAG_THREAD_KEVENT;
+	} else {
+		// Find highest priority and check for special request types
+		for (priclass = 0; priclass < WORKQUEUE_EVENT_MANAGER_BUCKET; priclass++) {
+			if (wq->wq_requests[priclass])
+				break;
+		}
+		if (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET){
+			// only request should have been event manager since it's not in a bucket,
+			// but we weren't able to handle it since there's already an event manager running,
+			// so we fell into this case
+			assert(wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 1 &&
+				   wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 1 &&
+				   wq->wq_reqcount == 1);
+			goto done;
+		}
+
+		if (wq->wq_kevent_ocrequests[priclass]){
+			mode = RUN_NEXTREQ_DEFERRED_OVERCOMMIT;
+			upcall_flags |= WQ_FLAG_THREAD_KEVENT;
+			upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
+		} else if (wq->wq_ocrequests[priclass]){
+			mode = RUN_NEXTREQ_DEFERRED_OVERCOMMIT;
+			upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
+		} else if (wq->wq_kevent_requests[priclass]){
+			upcall_flags |= WQ_FLAG_THREAD_KEVENT;
+		}
+	}
+
+	if (mode == RUN_NEXTREQ_DEFAULT /* non-overcommit */){
+		uint32_t my_priclass = (thread != THREAD_NULL) ? tl->th_priority : WORKQUEUE_NUM_BUCKETS;
+		if (may_start_constrained_thread(wq, priclass, my_priclass, &start_timer) == FALSE){
+			// per policy, we won't start another constrained thread
+			goto done;
+		}
 	}
 
 	if (thread != THREAD_NULL) {
@@ -2031,11 +2516,7 @@ dispatch_overcommit:
 		 * we pick up new work for this specific thread.
 		 */
 		th_to_run = thread;
-		goto pick_up_work;
-	}
-
-grab_idle_thread:
-	if (wq->wq_thidlecount == 0) {
+	} else if (wq->wq_thidlecount == 0) {
 		/*
 		 * we have no additional threads waiting to pick up
 		 * work, however, there is additional work to do.
@@ -2044,57 +2525,81 @@ grab_idle_thread:
 
 		PTHREAD_TRACE(TRACE_wq_stalled, wq, wq->wq_nthreads, start_timer, 0, 0);
 
-		goto no_thread_to_run;
+		goto done;
+	} else {
+    	// there is both work available and an idle thread, so activate a thread
+    	tl = pop_from_thidlelist(wq, priclass, &upcall_flags, &wake_thread);
+    	th_to_run = tl->th_thread;
 	}
 
-	/*
-	 * we already know there is both work available
-	 * and an idle thread, so activate a thread and then
-	 * fall into the code that pulls a new work request...
-	 */
-	tl = TAILQ_FIRST(&wq->wq_thidlelist);
-	TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-	wq->wq_thidlecount--;	
-
-	TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
-
-	if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
-		tl->th_flags &= ~TH_LIST_SUSPENDED;
-		reuse_thread = 0;
-
-	} else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
-		tl->th_flags &= ~TH_LIST_BLOCKED;
-		wake_thread = 1;
-	}
-	tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
+	// Adjust counters and thread flags AKA consume the request
+	// TODO: It would be lovely if OVERCOMMIT consumed reqcount
+	switch (mode) {
+		case RUN_NEXTREQ_DEFAULT:
+		case RUN_NEXTREQ_UNCONSTRAINED:
+			wq->wq_reqcount--;
+			wq->wq_requests[priclass]--;
 
-	wq->wq_threads_scheduled++;
-	wq->wq_thscheduled_count[priclass]++;
-	OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
+			if (mode == RUN_NEXTREQ_DEFAULT){
+				if (!(tl->th_flags & TH_LIST_CONSTRAINED)) {
+					wq->wq_constrained_threads_scheduled++;
+					tl->th_flags |= TH_LIST_CONSTRAINED;
+				}
+			} else if (mode == RUN_NEXTREQ_UNCONSTRAINED){
+				if (tl->th_flags & TH_LIST_CONSTRAINED) {
+					// XXX: Why aren't we unsetting CONSTRAINED_THREAD_LIMIT here
+					wq->wq_constrained_threads_scheduled--;
+					tl->th_flags &= ~TH_LIST_CONSTRAINED;
+				}
+			}
+			if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
+				wq->wq_kevent_requests[priclass]--;
+			}
+			break;
 
-	adjust_counters = FALSE;
-	th_to_run = tl->th_thread;
+		case RUN_NEXTREQ_EVENT_MANAGER:
+			wq->wq_reqcount--;
+			wq->wq_requests[priclass]--;
 
-pick_up_work:
-	if (!overcommit && !force_oc) {
-		wq->wq_reqcount--;
-		wq->wq_requests[priclass]--;
+			if (tl->th_flags & TH_LIST_CONSTRAINED) {
+				wq->wq_constrained_threads_scheduled--;
+				tl->th_flags &= ~TH_LIST_CONSTRAINED;
+			}
+			if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
+				wq->wq_kevent_requests[priclass]--;
+			}
+			break;
 
-		if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) {
-			wq->wq_constrained_threads_scheduled++;
-			tl->th_flags |= TH_LIST_CONSTRAINED;
-		}
-	} else {
-		if (tl->th_flags & TH_LIST_CONSTRAINED) {
-			wq->wq_constrained_threads_scheduled--;
-			tl->th_flags &= ~TH_LIST_CONSTRAINED;
-		}
+		case RUN_NEXTREQ_DEFERRED_OVERCOMMIT:
+			wq->wq_reqcount--;
+			wq->wq_requests[priclass]--;
+			if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
+				wq->wq_kevent_ocrequests[priclass]--;
+			} else {
+    			wq->wq_ocrequests[priclass]--;
+			}
+			/* FALLTHROUGH */
+		case RUN_NEXTREQ_OVERCOMMIT:
+			if (tl->th_flags & TH_LIST_CONSTRAINED) {
+				wq->wq_constrained_threads_scheduled--;
+				tl->th_flags &= ~TH_LIST_CONSTRAINED;
+			}
+			break;
 	}
 
-	orig_class = tl->th_priority;
+	// Confirm we've maintained our counter invariants
+	assert(wq->wq_requests[priclass] < UINT16_MAX);
+	assert(wq->wq_ocrequests[priclass] < UINT16_MAX);
+	assert(wq->wq_kevent_requests[priclass] < UINT16_MAX);
+	assert(wq->wq_kevent_ocrequests[priclass] < UINT16_MAX);
+	assert(wq->wq_ocrequests[priclass] + wq->wq_kevent_requests[priclass] +
+			wq->wq_kevent_ocrequests[priclass] <=
+			wq->wq_requests[priclass]);
+
+	uint32_t orig_class = tl->th_priority;
 	tl->th_priority = (uint8_t)priclass;
 
-	if (adjust_counters && (orig_class != priclass)) {
+	if ((thread != THREAD_NULL) && (orig_class != priclass)) {
 		/*
 		 * we need to adjust these counters based on this
 		 * thread's new disposition w/r to priority
@@ -2109,96 +2614,53 @@ pick_up_work:
 
 	workqueue_unlock(p);
 
-	if (orig_class != priclass) {
-		pthread_priority_t pri = pthread_priority_from_class_index(priclass);
-
-		thread_qos_policy_data_t qosinfo;
-
-		/* Set the QoS tier on the thread, along with the ceiling of max importance for this class. */
-		qosinfo.qos_tier = pthread_priority_get_qos_class(pri);
-		qosinfo.tier_importance = 0;
-
-		PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_START, wq, thread_tid(tl->th_thread), pthread_priority_from_class_index(orig_class), 0, 0);
-
-		/* All the previous implementation here now boils down to setting the QoS policy on the thread. */
-		pthread_kern->thread_policy_set_internal(th_to_run, THREAD_QOS_POLICY, (thread_policy_t)&qosinfo, THREAD_QOS_POLICY_COUNT);
+	pthread_priority_t outgoing_priority;
+	if (mode == RUN_NEXTREQ_EVENT_MANAGER){
+		outgoing_priority = wq->wq_event_manager_priority;
+	} else {
+		outgoing_priority = pthread_priority_from_class_index(priclass);
+	}
 
-		PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_END, wq, thread_tid(tl->th_thread), pthread_priority_from_class_index(priclass), qosinfo.qos_tier, 0);
+	PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_START, wq, thread_tid(tl->th_thread), outgoing_priority, 0, 0);
+	if (outgoing_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
+		reset_to_schedpri(tl, outgoing_priority & (~_PTHREAD_PRIORITY_FLAGS_MASK));
+	} else if (orig_class != priclass) {
+		reset_to_priority(tl, outgoing_priority);
 	}
+	PTHREAD_TRACE(TRACE_wq_reset_priority | DBG_FUNC_END, wq, thread_tid(tl->th_thread), outgoing_priority, 0, 0);
 
 	/*
 	 * if current thread is reused for work request, does not return via unix_syscall
 	 */
-	wq_runreq(p, overcommit, pthread_priority_from_class_index(priclass), th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
+	wq_runreq(p, outgoing_priority, th_to_run, tl, upcall_flags, wake_thread, (thread == th_to_run));
 	
-	PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(th_to_run), overcommit, 1, 0);
+	PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(th_to_run), mode == RUN_NEXTREQ_OVERCOMMIT, 1, 0);
 
 	return (TRUE);
 
-out_of_work:
-	/*
-	 * we have no work to do or we are fully booked
-	 * w/r to running threads...
-	 */
-no_thread_to_run:
-	workqueue_unlock(p);
-
-	if (start_timer)
-		workqueue_interval_timer_start(wq);
-
-	PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(thread), start_timer, 2, 0);
-
-	return (FALSE);
-
-parkit:
-	/*
-	 * this is a workqueue thread with no more
-	 * work to do... park it for now
-	 */
-	TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
-	tl->th_flags &= ~TH_LIST_RUNNING;
-
-	tl->th_flags |= TH_LIST_BLOCKED;
-	TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
-
-	pthread_kern->thread_sched_call(th_to_park, NULL);
-
-	OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
-	wq->wq_thscheduled_count[tl->th_priority]--;
-	wq->wq_threads_scheduled--;
-
-	if (tl->th_flags & TH_LIST_CONSTRAINED) {
-		wq->wq_constrained_threads_scheduled--;
-		wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
-		tl->th_flags &= ~TH_LIST_CONSTRAINED;
+done:
+	if (thread != THREAD_NULL){
+		parkit(wq,tl,thread);
 	}
-	if (wq->wq_thidlecount < 100)
-		us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
-	else
-		us_to_wait = wq_reduce_pool_window_usecs / 100;
-
-	wq->wq_thidlecount++;
-	wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
-
-	assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
-			TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
-			wq_reduce_pool_window_usecs, NSEC_PER_USEC);
 
 	workqueue_unlock(p);
 
 	if (start_timer)
 		workqueue_interval_timer_start(wq);
 
-	PTHREAD_TRACE1(TRACE_wq_thread_park | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(th_to_park));
-	PTHREAD_TRACE(TRACE_wq_run_nextitem | DBG_FUNC_END, wq, thread_tid(thread), 0, 3, 0);
+	PTHREAD_TRACE(TRACE_wq_run_nextitem | DBG_FUNC_END, wq, thread_tid(thread), start_timer, 3, 0);
 
-	thread_block((thread_continue_t)wq_unpark_continue);
-	/* NOT REACHED */
+	if (thread != THREAD_NULL){
+    	thread_block((thread_continue_t)wq_unpark_continue);
+		/* NOT REACHED */
+	}
 
 	return (FALSE);
 }
 
-
+/**
+ * Called when a new thread is created
+ */
 static void
 wq_unsuspend_continue(void)
 {
@@ -2268,88 +2730,90 @@ normal_resume_to_user:
 	pthread_kern->thread_bootstrap_return();
 }
 
-
+/**
+ * parked thread wakes up
+ */
 static void
 wq_unpark_continue(void)
 {
-	struct uthread *uth = NULL;
+	struct uthread *uth;
 	struct threadlist *tl;
-	thread_t th_to_unpark;
-	proc_t 	p;
-				
-	th_to_unpark = current_thread();
-	uth = pthread_kern->get_bsdthread_info(th_to_unpark);
 
-	if (uth != NULL) {
-		if ((tl = pthread_kern->uthread_get_threadlist(uth)) != NULL) {
+	thread_t th_to_unpark = current_thread();
 
-			if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
-				/*
-				 * a normal wakeup of this thread occurred... no need 
-				 * for any synchronization with the timer and wq_runreq
-				 */
-normal_return_to_user:			
-				pthread_kern->thread_sched_call(th_to_unpark, workqueue_callback);
+	if ((uth = pthread_kern->get_bsdthread_info(th_to_unpark)) == NULL)
+		goto done;
+	if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL)
+    	goto done;
 
-				PTHREAD_TRACE(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0);
-	
-				pthread_kern->thread_exception_return();
-			}
-			p = current_proc();
+	/*
+	 * check if a normal wakeup of this thread occurred... if so, there's no need
+	 * for any synchronization with the timer and wq_runreq so we just skip all this.
+	 */
+	if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) != TH_LIST_RUNNING) {
+		proc_t p = current_proc();
 
-			workqueue_lock_spin(p);
+		workqueue_lock_spin(p);
 
-			if ( !(tl->th_flags & TH_LIST_RUNNING)) {
-				/*
-				 * the timer popped us out and we've not
-				 * been moved off of the idle list
-				 * so we should now self-destruct
-				 *
-				 * workqueue_removethread consumes the lock
-				 */
-				workqueue_removethread(tl, 0);
-				pthread_kern->thread_exception_return();
-			}
+		if ( !(tl->th_flags & TH_LIST_RUNNING)) {
 			/*
-			 * the timer woke us up, but we have already
-			 * started to make this a runnable thread,
-			 * but have not yet finished that process...
-			 * so wait for the normal wakeup
+			 * the timer popped us out and we've not
+			 * been moved off of the idle list
+			 * so we should now self-destruct
+			 *
+			 * workqueue_removethread consumes the lock
 			 */
-			while ((tl->th_flags & TH_LIST_BUSY)) {
-
-				assert_wait((caddr_t)tl, (THREAD_UNINT));
+			workqueue_removethread(tl, 0);
+			pthread_kern->unix_syscall_return(0);
+		}
 
-				workqueue_unlock(p);
+		/*
+		 * the timer woke us up, but we have already
+		 * started to make this a runnable thread,
+		 * but have not yet finished that process...
+		 * so wait for the normal wakeup
+		 */
+		while ((tl->th_flags & TH_LIST_BUSY)) {
 
-				thread_block(THREAD_CONTINUE_NULL);
+			assert_wait((caddr_t)tl, (THREAD_UNINT));
 
-				workqueue_lock_spin(p);
-			}
-			/*
-			 * we have finished setting up the thread's context
-			 * now we can return as if we got a normal wakeup
-			 */
 			workqueue_unlock(p);
 
-			goto normal_return_to_user;
+			thread_block(THREAD_CONTINUE_NULL);
+
+			workqueue_lock_spin(p);
 		}
+
+		/*
+		 * we have finished setting up the thread's context
+		 * now we can return as if we got a normal wakeup
+		 */
+		workqueue_unlock(p);
 	}
+
+	pthread_kern->thread_sched_call(th_to_unpark, workqueue_callback);
+
+	// FIXME: What's this?
+	PTHREAD_TRACE(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0);
+
+done:
+
+	// XXX should be using unix_syscall_return(EJUSTRETURN)
 	pthread_kern->thread_exception_return();
 }
 
 
 
 static void 
-wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t th, struct threadlist *tl,
-	   int reuse_thread, int wake_thread, int return_directly)
+wq_runreq(proc_t p, pthread_priority_t priority, thread_t th, struct threadlist *tl,
+	   int flags, int wake_thread, int return_directly)
 {
 	int ret = 0;
 	boolean_t need_resume = FALSE;
 
-	PTHREAD_TRACE1(TRACE_wq_runitem | DBG_FUNC_START, tl->th_workq, overcommit, priority, thread_tid(current_thread()), thread_tid(th));
+	PTHREAD_TRACE1(TRACE_wq_runitem | DBG_FUNC_START, tl->th_workq, flags, priority, thread_tid(current_thread()), thread_tid(th));
 
-	ret = _setup_wqthread(p, th, overcommit, priority, reuse_thread, tl);
+	ret = _setup_wqthread(p, th, priority, flags, tl);
 
 	if (ret != 0)
 		panic("setup_wqthread failed  %x\n", ret);
@@ -2357,6 +2821,7 @@ wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t
 	if (return_directly) {
 		PTHREAD_TRACE(TRACE_wq_run_nextitem|DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0);
 
+		// XXX should be using unix_syscall_return(EJUSTRETURN)
 		pthread_kern->thread_exception_return();
 		panic("wq_runreq: thread_exception_return returned ...\n");
 	}
@@ -2368,7 +2833,7 @@ wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t
 
 		workqueue_unlock(p);
 	} else {
-	        PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
+		PTHREAD_TRACE1(TRACE_wq_thread_suspend | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
 
 		workqueue_lock_spin(p);
 
@@ -2392,65 +2857,126 @@ wq_runreq(proc_t p, boolean_t overcommit, pthread_priority_t priority, thread_t
 	}
 }
 
+#define KEVENT_LIST_LEN 16
+#define KEVENT_DATA_SIZE (32 * 1024)
 
+/**
+ * configures initial thread stack/registers to jump into:
+ * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int flags, int nkevents);
+ * to get there we jump through assembily stubs in pthread_asm.s.  Those
+ * routines setup a stack frame, using the current stack pointer, and marshall
+ * arguments from registers to the stack as required by the ABI.
+ *
+ * One odd thing we do here is to start the pthread_t 4k below what would be the
+ * top of the stack otherwise.  This is because usually only the first 4k of the
+ * pthread_t will be used and so we want to put it on the same 16k page as the
+ * top of the stack to save memory.
+ *
+ * When we are done the stack will look like:
+ * |-----------| th_stackaddr + th_allocsize
+ * |pthread_t  | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
+ * |kevent list| optionally - at most KEVENT_LIST_LEN events
+ * |kevent data| optionally - at most KEVENT_DATA_SIZE bytes
+ * |stack gap  | bottom aligned to 16 bytes, and at least as big as stack_gap_min
+ * |   STACK   |
+ * |     â     |
+ * |           |
+ * |guard page | guardsize
+ * |-----------| th_stackaddr
+ */
 int
-_setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t priority, int reuse_thread, struct threadlist *tl)
+_setup_wqthread(proc_t p, thread_t th, pthread_priority_t priority, int flags, struct threadlist *tl)
 {
-	uint32_t flags = reuse_thread | WQ_FLAG_THREAD_NEWSPI;
-	mach_vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
 	int error = 0;
 
-	if (overcommit) {
-		flags |= WQ_FLAG_THREAD_OVERCOMMIT;
-	}
+	const vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
+	const vm_size_t stack_gap_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_REDZONE_LEN;
+	const vm_size_t stack_align_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_STK_ALIGN;
+
+	user_addr_t pthread_self_addr = (user_addr_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET);
+	user_addr_t stack_top_addr = (user_addr_t)((pthread_self_addr - stack_gap_min) & -stack_align_min);
+	user_addr_t stack_bottom_addr = (user_addr_t)(tl->th_stackaddr + guardsize);
 
 	/* Put the QoS class value into the lower bits of the reuse_thread register, this is where
 	 * the thread priority used to be stored anyway.
 	 */
 	flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
 
-#if defined(__i386__) || defined(__x86_64__)
-	int isLP64 = proc_is64bit(p);
+	flags |= WQ_FLAG_THREAD_NEWSPI;
 
-	/*
-	 * Set up i386 registers & function call.
-	 */
-	if (isLP64 == 0) {
-		x86_thread_state32_t state;
-		x86_thread_state32_t *ts = &state;
-
-		ts->eip = (unsigned int)pthread_kern->proc_get_wqthread(p);
-		ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize);
-		ts->ebx = (unsigned int)tl->th_thport;
-		ts->ecx = (unsigned int)(tl->th_stackaddr + guardsize);
-		ts->edx = (unsigned int)0;
-		ts->edi = (unsigned int)flags;
-		ts->esi = (unsigned int)0;
-		/*
-		 * set stack pointer
-		 */
-		ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize) - C_32_STK_ALIGN));
+	user_addr_t kevent_list = NULL;
+	int kevent_count = 0;
+	if (flags & WQ_FLAG_THREAD_KEVENT){
+		kevent_list = pthread_self_addr - KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
+		kevent_count = KEVENT_LIST_LEN;
 
-		(void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)ts);
+		user_addr_t kevent_data_buf = kevent_list - KEVENT_DATA_SIZE;
+		user_size_t kevent_data_available = KEVENT_DATA_SIZE;
 
-	} else {
-		x86_thread_state64_t state64;
-		x86_thread_state64_t *ts64 = &state64;
+		int32_t events_out = 0;
 
-		ts64->rip = (uint64_t)pthread_kern->proc_get_wqthread(p);
-		ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize);
-		ts64->rsi = (uint64_t)(tl->th_thport);
-		ts64->rdx = (uint64_t)(tl->th_stackaddr + guardsize);
-		ts64->rcx = (uint64_t)0;
-		ts64->r8 = (uint64_t)flags;
-		ts64->r9 = (uint64_t)0;
+		int ret = kevent_qos_internal(p, -1, NULL, 0, kevent_list, kevent_count,
+									  kevent_data_buf, &kevent_data_available,
+									  KEVENT_FLAG_WORKQ | KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_STACK_EVENTS | KEVENT_FLAG_IMMEDIATE,
+									  &events_out);
 
-		/*
-		 * set stack pointer aligned to 16 byte boundary
-		 */
-		ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize) - C_64_REDZONE_LEN);
+		// squash any errors into just empty output on non-debug builds
+		assert(ret == KERN_SUCCESS && events_out != -1);
+		if (ret != KERN_SUCCESS || events_out == -1){
+			events_out = 0;
+			kevent_data_available = KEVENT_DATA_SIZE;
+		}
+
+		// We shouldn't get data out if there aren't events available
+		assert(events_out != 0 || kevent_data_available == KEVENT_DATA_SIZE);
+
+		if (events_out >= 0){
+			kevent_count = events_out;
+			kevent_list = pthread_self_addr - kevent_count * sizeof(struct kevent_qos_s);
+
+			if (kevent_data_available == KEVENT_DATA_SIZE){
+				stack_top_addr = (kevent_list - stack_gap_min) & -stack_align_min;
+			} else {
+				stack_top_addr = (kevent_data_buf + kevent_data_available - stack_gap_min) & -stack_align_min;
+			}
+		} else {
+			kevent_list = NULL;
+			kevent_count = 0;
+		}
+	}
+
+#if defined(__i386__) || defined(__x86_64__)
+	int isLP64 = proc_is64bit(p);
 
-		error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)ts64);
+	if (isLP64 == 0) {
+		x86_thread_state32_t state = {
+			.eip = (unsigned int)pthread_kern->proc_get_wqthread(p),
+			.eax = /* arg0 */ (unsigned int)pthread_self_addr,
+			.ebx = /* arg1 */ (unsigned int)tl->th_thport,
+			.ecx = /* arg2 */ (unsigned int)stack_bottom_addr,
+			.edx = /* arg3 */ (unsigned int)kevent_list,
+			.edi = /* arg4 */ (unsigned int)flags,
+			.esi = /* arg5 */ (unsigned int)kevent_count,
+
+			.esp = (int)((vm_offset_t)stack_top_addr),
+		};
+
+		(void)pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
+	} else {
+		x86_thread_state64_t state64 = {
+			// x86-64 already passes all the arguments in registers, so we just put them in their final place here
+			.rip = (uint64_t)pthread_kern->proc_get_wqthread(p),
+			.rdi = (uint64_t)pthread_self_addr,
+			.rsi = (uint64_t)tl->th_thport,
+			.rdx = (uint64_t)stack_bottom_addr,
+			.rcx = (uint64_t)kevent_list,
+			.r8  = (uint64_t)flags,
+			.r9  = (uint64_t)kevent_count,
+
+			.rsp = (uint64_t)(stack_top_addr)
+		};
+
+		error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
 		if (error != KERN_SUCCESS) {
 			error = EINVAL;
 		}
@@ -2462,6 +2988,27 @@ _setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, pthread_priority_t
 	return error;
 }
 
+#if DEBUG
+static int wq_kevent_test SYSCTL_HANDLER_ARGS {
+	//(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
+#pragma unused(oidp, arg1, arg2)
+	int error;
+	struct workq_reqthreads_req_s requests[64] = {};
+
+	if (req->newlen > sizeof(requests) || req->newlen < sizeof(struct workq_reqthreads_req_s))
+		return EINVAL;
+
+	error = copyin(req->newptr, requests, req->newlen);
+	if (error) return error;
+
+	_workq_reqthreads(req->p, (int)(req->newlen / sizeof(struct workq_reqthreads_req_s)), requests);
+	
+	return 0;
+}
+#endif // DEBUG
+	
+#pragma mark - Misc
+
 int 
 _fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
 {
@@ -2535,4 +3082,12 @@ _pthread_init(void)
 	sysctl_register_oid(&sysctl__kern_wq_max_threads);
 	sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
 	sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
+
+#if DEBUG
+	sysctl_register_oid(&sysctl__kern_wq_max_concurrency);
+	sysctl_register_oid(&sysctl__debug_wq_kevent_test);
+#endif
+
+	wq_max_concurrency = pthread_kern->ml_get_max_cpus();
+
 }
diff --git a/kern/kern_synch.c b/kern/kern_synch.c
index cd698ec..7136912 100644
--- a/kern/kern_synch.c
+++ b/kern/kern_synch.c
@@ -68,7 +68,6 @@
 #include <kern/zalloc.h>
 #include <kern/sched_prim.h>
 #include <kern/processor.h>
-#include <kern/wait_queue.h>
 //#include <kern/mach_param.h>
 #include <mach/mach_vm.h>
 #include <mach/mach_param.h>
diff --git a/kern/kern_trace.h b/kern/kern_trace.h
index 701fa70..7904ac5 100644
--- a/kern/kern_trace.h
+++ b/kern/kern_trace.h
@@ -101,6 +101,14 @@ TRACE_CODE(wq_override_start, _TRACE_SUB_WORKQUEUE, 0x12);
 TRACE_CODE(wq_override_end, _TRACE_SUB_WORKQUEUE, 0x13);
 TRACE_CODE(wq_override_dispatch, _TRACE_SUB_WORKQUEUE, 0x14);
 TRACE_CODE(wq_override_reset, _TRACE_SUB_WORKQUEUE, 0x15);
+TRACE_CODE(wq_req_event_manager, _TRACE_SUB_WORKQUEUE, 0x16);
+TRACE_CODE(wq_kevent_req_threads, _TRACE_SUB_WORKQUEUE, 0x17);
+TRACE_CODE(wq_req_kevent_threads, _TRACE_SUB_WORKQUEUE, 0x18);
+TRACE_CODE(wq_req_kevent_octhreads, _TRACE_SUB_WORKQUEUE, 0x19);
+TRACE_CODE(wq_thread_limit_exceeded, _TRACE_SUB_WORKQUEUE, 0x1a);
+TRACE_CODE(wq_thread_constrained_maxed, _TRACE_SUB_WORKQUEUE, 0x1b);
+TRACE_CODE(wq_thread_add_during_exit, _TRACE_SUB_WORKQUEUE, 0x1c);
+TRACE_CODE(wq_thread_create_failed, _TRACE_SUB_WORKQUEUE, 0x1d);
 
 // synch trace points
 TRACE_CODE(psynch_mutex_ulock, _TRACE_SUB_MUTEX, 0x0);
diff --git a/kern/workqueue_internal.h b/kern/workqueue_internal.h
index cad0b50..4b8d721 100644
--- a/kern/workqueue_internal.h
+++ b/kern/workqueue_internal.h
@@ -34,10 +34,12 @@
  */
 
 /* workq_kernreturn commands */
-#define WQOPS_THREAD_RETURN 4
-#define WQOPS_QUEUE_NEWSPISUPP  0x10	/* this is to check for newer SPI support */
-#define WQOPS_QUEUE_REQTHREADS  0x20	/* request number of threads of a prio */
-#define WQOPS_QUEUE_REQTHREADS2 0x30	/* request a number of threads in a given priority bucket */
+#define WQOPS_THREAD_RETURN        0x04	/* parks the thread back into the kernel */
+#define WQOPS_QUEUE_NEWSPISUPP     0x10	/* this is to check for newer SPI support */
+#define WQOPS_QUEUE_REQTHREADS     0x20	/* request number of threads of a prio */
+#define WQOPS_QUEUE_REQTHREADS2    0x30	/* request a number of threads in a given priority bucket */
+#define WQOPS_THREAD_KEVENT_RETURN 0x40	/* parks the thread after delivering the passed kevent array */
+#define WQOPS_SET_EVENT_MANAGER_PRIORITY 0x80	/* max() in the provided priority in the the priority of the event manager */
 
 /* flag values for reuse field in the libc side _pthread_wqthread */
 #define	WQ_FLAG_THREAD_PRIOMASK		0x0000ffff
@@ -45,6 +47,8 @@
 #define	WQ_FLAG_THREAD_OVERCOMMIT	0x00010000	/* thread is with overcommit prio */
 #define	WQ_FLAG_THREAD_REUSE		0x00020000	/* thread is being reused */
 #define	WQ_FLAG_THREAD_NEWSPI		0x00040000	/* the call is with new SPIs */
+#define WQ_FLAG_THREAD_KEVENT       0x00080000  /* thread is response to kevent req */
+#define WQ_FLAG_THREAD_EVENT_MANAGER    0x00100000  /* event manager thread */
 
 /* These definitions are only available to the kext, to avoid bleeding constants and types across the boundary to
  * the userspace library.
@@ -71,7 +75,11 @@ enum {
 #define WORKQUEUE_LOW_PRIOQUEUE     2       /* low priority queue */
 #define WORKQUEUE_BG_PRIOQUEUE      3       /* background priority queue */
 
-#define WORKQUEUE_NUM_BUCKETS 6
+#define WORKQUEUE_NUM_BUCKETS 7
+
+// Sometimes something gets passed a bucket number and we need a way to express
+// that it's actually the event manager.  Use the (n+1)th bucket for that.
+#define WORKQUEUE_EVENT_MANAGER_BUCKET (WORKQUEUE_NUM_BUCKETS-1)
 
 /* wq_max_constrained_threads = max(64, N_CPU * WORKQUEUE_CONSTRAINED_FACTOR)
  * This used to be WORKQUEUE_NUM_BUCKETS + 1 when NUM_BUCKETS was 4, yielding
@@ -101,6 +109,7 @@ struct threadlist {
 #define TH_LIST_BUSY		0x10
 #define TH_LIST_NEED_WAKEUP	0x20
 #define TH_LIST_CONSTRAINED	0x40
+#define TH_LIST_EVENT_MGR_SCHED_PRI	0x80
 
 
 struct workqueue {
@@ -108,8 +117,8 @@ struct workqueue {
 	vm_map_t	wq_map;
 	task_t		wq_task;
 	thread_call_t	wq_atimer_call;
-	int 		wq_flags;
-	int			wq_lflags;
+	int 		wq_flags;  // updated atomically
+	int			wq_lflags; // protected by wqueue lock
 	uint64_t 	wq_thread_yielded_timestamp;
 	uint32_t	wq_thread_yielded_count;
 	uint32_t	wq_timer_interval;
@@ -118,15 +127,28 @@ struct workqueue {
 	uint32_t	wq_constrained_threads_scheduled;
 	uint32_t	wq_nthreads;
 	uint32_t	wq_thidlecount;
-	uint32_t	wq_reqcount;
 	TAILQ_HEAD(, threadlist) wq_thrunlist;
 	TAILQ_HEAD(, threadlist) wq_thidlelist;
+
+	/* Counters for how many requests we have outstanding.  The invariants here:
+	 *   - reqcount == SUM(requests) + (event manager ? 1 : 0)
+	 *   - SUM(ocrequests) + SUM(kevent_requests) + SUM(kevent_ocrequests) <= SUM(requests)
+	 *   - # of constrained requests is difference between quantities above
+	 * i.e. a kevent+overcommit request will incrument reqcount, requests and 
+	 * kevent_ocrequests only.
+	 */
+	uint32_t	wq_reqcount;
 	uint16_t	wq_requests[WORKQUEUE_NUM_BUCKETS];
 	uint16_t	wq_ocrequests[WORKQUEUE_NUM_BUCKETS];
+	uint16_t	wq_kevent_requests[WORKQUEUE_NUM_BUCKETS];
+	uint16_t	wq_kevent_ocrequests[WORKQUEUE_NUM_BUCKETS];
+
 	uint16_t	wq_reqconc[WORKQUEUE_NUM_BUCKETS];	  		/* requested concurrency for each priority level */
 	uint16_t	wq_thscheduled_count[WORKQUEUE_NUM_BUCKETS];
 	uint32_t	wq_thactive_count[WORKQUEUE_NUM_BUCKETS] __attribute__((aligned(4))); /* must be uint32_t since we OSAddAtomic on these */
-	uint64_t	wq_lastblocked_ts[WORKQUEUE_NUM_BUCKETS] __attribute__((aligned(8)));
+	uint64_t	wq_lastblocked_ts[WORKQUEUE_NUM_BUCKETS] __attribute__((aligned(8))); /* XXX: why per bucket? */
+
+	uint32_t wq_event_manager_priority;
 };
 #define WQ_LIST_INITED		0x01
 #define WQ_ATIMER_RUNNING	0x02
@@ -137,7 +159,6 @@ struct workqueue {
 #define WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT    0x04
 #define WQL_EXCEEDED_TOTAL_THREAD_LIMIT          0x08
 
-
 #define WQ_VECT_SET_BIT(vector, bit)	\
 	vector[(bit) / 32] |= (1 << ((bit) % 32))
 
diff --git a/libpthread.xcodeproj/project.pbxproj b/libpthread.xcodeproj/project.pbxproj
index 966894b..ca9263c 100644
--- a/libpthread.xcodeproj/project.pbxproj
+++ b/libpthread.xcodeproj/project.pbxproj
@@ -13,6 +13,7 @@
 			buildPhases = (
 			);
 			dependencies = (
+				6E8C16821B14F11800C8987C /* PBXTargetDependency */,
 				C90E7AB015DC3D3D00A06D48 /* PBXTargetDependency */,
 				C90E7AB215DC3D3D00A06D48 /* PBXTargetDependency */,
 			);
@@ -36,6 +37,7 @@
 			buildPhases = (
 			);
 			dependencies = (
+				6E8C16841B14F11B00C8987C /* PBXTargetDependency */,
 				C98832C615DEB44B00B3308E /* PBXTargetDependency */,
 				C98832C815DEB44B00B3308E /* PBXTargetDependency */,
 				74E594AB1613AD7F006C417B /* PBXTargetDependency */,
@@ -47,6 +49,47 @@
 /* End PBXAggregateTarget section */
 
 /* Begin PBXBuildFile section */
+		6E8C16541B14F08A00C8987C /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; };
+		6E8C16551B14F08A00C8987C /* pthread.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325FA15B7513200270056 /* pthread.c */; };
+		6E8C16561B14F08A00C8987C /* pthread_cancelable.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F115B7513200270056 /* pthread_cancelable.c */; };
+		6E8C16571B14F08A00C8987C /* plockstat.d in Sources */ = {isa = PBXBuildFile; fileRef = C9A325EF15B7513200270056 /* plockstat.d */; };
+		6E8C16581B14F08A00C8987C /* pthread_cond.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F215B7513200270056 /* pthread_cond.c */; };
+		6E8C16591B14F08A00C8987C /* pthread_mutex.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F515B7513200270056 /* pthread_mutex.c */; };
+		6E8C165A1B14F08A00C8987C /* pthread_mutex_up.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C81B0EB29D005915CE /* pthread_mutex_up.c */; };
+		6E8C165B1B14F08A00C8987C /* qos.c in Sources */ = {isa = PBXBuildFile; fileRef = C9244C1C1860D8EF00075748 /* qos.c */; };
+		6E8C165C1B14F08A00C8987C /* pthread_rwlock.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F615B7513200270056 /* pthread_rwlock.c */; };
+		6E8C165D1B14F08A00C8987C /* pthread_tsd.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F815B7513200270056 /* pthread_tsd.c */; };
+		6E8C165E1B14F08A00C8987C /* pthread_cancelable_cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5215C9A9F5006BB313 /* pthread_cancelable_cancel.c */; };
+		6E8C165F1B14F08A00C8987C /* pthread_cancelable_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A1BF5415C9CB9D006BB313 /* pthread_cancelable_legacy.c */; };
+		6E8C16601B14F08A00C8987C /* pthread_cond_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C975D5D615C9CECA0098ECD8 /* pthread_cond_legacy.c */; };
+		6E8C16611B14F08A00C8987C /* pthread_mutex_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C975D5D815C9CEEA0098ECD8 /* pthread_mutex_legacy.c */; };
+		6E8C16621B14F08A00C8987C /* pthread_rwlock_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = C975D5DA15C9CEFA0098ECD8 /* pthread_rwlock_legacy.c */; };
+		6E8C16631B14F08A00C8987C /* pthread_support.c in Sources */ = {isa = PBXBuildFile; fileRef = C975D5DC15C9D16B0098ECD8 /* pthread_support.c */; };
+		6E8C16641B14F08A00C8987C /* thread_setup.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325FC15B7513200270056 /* thread_setup.c */; };
+		6E8C16651B14F08A00C8987C /* pthread_atfork.c in Sources */ = {isa = PBXBuildFile; fileRef = C90E7AB415DC40D900A06D48 /* pthread_atfork.c */; };
+		6E8C16661B14F08A00C8987C /* pthread_asm.s in Sources */ = {isa = PBXBuildFile; fileRef = C99AD87D15DF04D10009A6F8 /* pthread_asm.s */; };
+		6E8C16691B14F08A00C8987C /* qos.h in Headers */ = {isa = PBXBuildFile; fileRef = C9244C1A185FCFED00075748 /* qos.h */; };
+		6E8C166A1B14F08A00C8987C /* pthread.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325FE15B7513700270056 /* pthread.h */; };
+		6E8C166B1B14F08A00C8987C /* pthread_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325FF15B7513700270056 /* pthread_impl.h */; };
+		6E8C166C1B14F08A00C8987C /* qos.h in Headers */ = {isa = PBXBuildFile; fileRef = E4063CF21906B4FB000202F9 /* qos.h */; };
+		6E8C166D1B14F08A00C8987C /* pthread_spis.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A3260015B7513700270056 /* pthread_spis.h */; };
+		6E8C166E1B14F08A00C8987C /* sched.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A3260115B7513700270056 /* sched.h */; };
+		6E8C166F1B14F08A00C8987C /* introspection_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E4657D4017284F7B007D1847 /* introspection_private.h */; };
+		6E8C16701B14F08A00C8987C /* tsd_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F415B7513200270056 /* tsd_private.h */; };
+		6E8C16711B14F08A00C8987C /* posix_sched.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F015B7513200270056 /* posix_sched.h */; };
+		6E8C16721B14F08A00C8987C /* atomic_llsc.h in Headers */ = {isa = PBXBuildFile; fileRef = E473BE1719AC305A009C5A52 /* atomic_llsc.h */; };
+		6E8C16731B14F08A00C8987C /* qos_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C99B17DA189C2E1B00991D38 /* qos_private.h */; };
+		6E8C16741B14F08A00C8987C /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = E473BE1819AC305A009C5A52 /* atomic.h */; };
+		6E8C16751B14F08A00C8987C /* spawn.h in Headers */ = {isa = PBXBuildFile; fileRef = C98C95D818FF1F4E005654FB /* spawn.h */; };
+		6E8C16761B14F08A00C8987C /* spinlock_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F715B7513200270056 /* spinlock_private.h */; };
+		6E8C16771B14F08A00C8987C /* workqueue_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9A325F915B7513200270056 /* workqueue_private.h */; };
+		6E8C16781B14F08A00C8987C /* private.h in Headers */ = {isa = PBXBuildFile; fileRef = C9153095167ACC22006BB094 /* private.h */; };
+		6EB232CB1B0EB2E2005915CE /* pthread_mutex_up.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C81B0EB29D005915CE /* pthread_mutex_up.c */; };
+		6EB232CC1B0EB2F0005915CE /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; };
+		6EB232CD1B0EB318005915CE /* pthread_mutex_up.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C81B0EB29D005915CE /* pthread_mutex_up.c */; };
+		6EB232CE1B0EB31B005915CE /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; };
+		6EB232CF1B0EB321005915CE /* pthread_mutex_up.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C81B0EB29D005915CE /* pthread_mutex_up.c */; };
+		6EB232D01B0EB325005915CE /* resolver.c in Sources */ = {isa = PBXBuildFile; fileRef = 6EB232C91B0EB29D005915CE /* resolver.c */; };
 		74E594931613AAF4006C417B /* pthread.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325FA15B7513200270056 /* pthread.c */; };
 		74E594941613AAF4006C417B /* pthread_cancelable.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F115B7513200270056 /* pthread_cancelable.c */; };
 		74E594951613AAF4006C417B /* pthread_cond.c in Sources */ = {isa = PBXBuildFile; fileRef = C9A325F215B7513200270056 /* pthread_cond.c */; };
@@ -104,10 +147,25 @@
 		C9D75E4216127B3900C2FB26 /* kern_synch.c in Sources */ = {isa = PBXBuildFile; fileRef = C9169DDB1603DE84005A2F8C /* kern_synch.c */; };
 		E4063CF31906B75A000202F9 /* qos.h in Headers */ = {isa = PBXBuildFile; fileRef = E4063CF21906B4FB000202F9 /* qos.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		E4657D4117284F7B007D1847 /* introspection_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E4657D4017284F7B007D1847 /* introspection_private.h */; settings = {ATTRIBUTES = (Private, ); }; };
-		FC4DBBB316DEA8DA00503E47 /* plockstat.d in Sources */ = {isa = PBXBuildFile; fileRef = C9A325EF15B7513200270056 /* plockstat.d */; };
+		E473BE1919AC305A009C5A52 /* atomic_llsc.h in Headers */ = {isa = PBXBuildFile; fileRef = E473BE1719AC305A009C5A52 /* atomic_llsc.h */; };
+		E473BE1A19AC305A009C5A52 /* atomic.h in Headers */ = {isa = PBXBuildFile; fileRef = E473BE1819AC305A009C5A52 /* atomic.h */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXContainerItemProxy section */
+		6E8C16811B14F11800C8987C /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = C9A325D915B7347000270056 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 6E8C16511B14F08A00C8987C;
+			remoteInfo = "libsystem_pthread.dylib introspection";
+		};
+		6E8C16831B14F11B00C8987C /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = C9A325D915B7347000270056 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 6E8C16511B14F08A00C8987C;
+			remoteInfo = "libsystem_pthread.dylib introspection";
+		};
 		74E594AA1613AD7F006C417B /* PBXContainerItemProxy */ = {
 			isa = PBXContainerItemProxy;
 			containerPortal = C9A325D915B7347000270056 /* Project object */;
@@ -160,7 +218,30 @@
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXFileReference section */
+		6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_pthread.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
+		6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = pthread_introspection.xcconfig; sourceTree = "<group>"; };
+		6EB232C81B0EB29D005915CE /* pthread_mutex_up.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pthread_mutex_up.c; sourceTree = "<group>"; };
+		6EB232C91B0EB29D005915CE /* resolver.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = resolver.c; sourceTree = "<group>"; };
+		6EB232CA1B0EB29D005915CE /* resolver.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = resolver.h; sourceTree = "<group>"; };
 		74E594A41613AAF4006C417B /* libpthread_eOS.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread_eOS.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		9207EB711AA6E008006FFC86 /* wq_kevent_stress.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = wq_kevent_stress.c; sourceTree = "<group>"; };
+		9240BF321AA669C4003C99B4 /* wqtrace.lua */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = wqtrace.lua; path = tools/wqtrace.lua; sourceTree = SOURCE_ROOT; };
+		9264D6831A9D3E010094346B /* atfork.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = atfork.c; sourceTree = "<group>"; };
+		9264D6841A9D3E010094346B /* cond.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = cond.c; sourceTree = "<group>"; };
+		9264D6851A9D3E010094346B /* cond_timed.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = cond_timed.c; sourceTree = "<group>"; };
+		9264D6861A9D3E010094346B /* custom_stack.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = custom_stack.c; sourceTree = "<group>"; };
+		9264D6871A9D3E010094346B /* join.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = join.c; sourceTree = "<group>"; };
+		9264D6881A9D3E010094346B /* Makefile */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.make; path = Makefile; sourceTree = "<group>"; };
+		9264D6891A9D3E010094346B /* maxwidth.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = maxwidth.c; sourceTree = "<group>"; };
+		9264D68A1A9D3E010094346B /* mutex.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = mutex.c; sourceTree = "<group>"; };
+		9264D68B1A9D3E010094346B /* once.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = once.c; sourceTree = "<group>"; };
+		9264D68C1A9D3E010094346B /* rwlock-signal.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = "rwlock-signal.c"; sourceTree = "<group>"; };
+		9264D68D1A9D3E010094346B /* rwlock.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = rwlock.c; sourceTree = "<group>"; };
+		9264D68E1A9D3E010094346B /* tsd.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = tsd.c; sourceTree = "<group>"; };
+		9264D68F1A9D3E010094346B /* wq_block_handoff.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = wq_block_handoff.c; sourceTree = "<group>"; };
+		9264D6901A9D3E010094346B /* wq_kevent.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = wq_kevent.c; sourceTree = "<group>"; };
+		92C577E11A378A85004AF98B /* kext_debug.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = kext_debug.xcconfig; sourceTree = "<group>"; };
+		92C577EA1A378C9C004AF98B /* pthread_debug.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = pthread_debug.xcconfig; sourceTree = "<group>"; };
 		A98FE72D19479F7C007718DA /* qos_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = qos_private.h; sourceTree = "<group>"; };
 		C90E7A9F15DC3C3800A06D48 /* libpthread.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libpthread.a; sourceTree = BUILT_PRODUCTS_DIR; };
 		C90E7AB415DC40D900A06D48 /* pthread_atfork.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pthread_atfork.c; sourceTree = "<group>"; };
@@ -265,6 +346,8 @@
 		C9DCA2A115DC4F2000D057E2 /* install-manpages.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "install-manpages.sh"; sourceTree = "<group>"; };
 		E4063CF21906B4FB000202F9 /* qos.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = qos.h; sourceTree = "<group>"; };
 		E4657D4017284F7B007D1847 /* introspection_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = introspection_private.h; sourceTree = "<group>"; };
+		E473BE1719AC305A009C5A52 /* atomic_llsc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic_llsc.h; sourceTree = "<group>"; };
+		E473BE1819AC305A009C5A52 /* atomic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = atomic.h; sourceTree = "<group>"; };
 		E4D962F919086AD600E8A9F2 /* qos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = qos.h; sourceTree = "<group>"; };
 		E4D962FC19086C5700E8A9F2 /* install-sys-headers.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = "install-sys-headers.sh"; sourceTree = "<group>"; };
 		FC30E28D16A747AD00A25B5F /* synch_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = synch_internal.h; sourceTree = "<group>"; };
@@ -284,6 +367,13 @@
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
+		6E8C16671B14F08A00C8987C /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 		74E594A01613AAF4006C417B /* Frameworks */ = {
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
@@ -315,6 +405,37 @@
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
+		9240BF331AA669EB003C99B4 /* tools */ = {
+			isa = PBXGroup;
+			children = (
+				9240BF321AA669C4003C99B4 /* wqtrace.lua */,
+			);
+			name = tools;
+			path = kern;
+			sourceTree = "<group>";
+		};
+		9264D6821A9D3E010094346B /* tests */ = {
+			isa = PBXGroup;
+			children = (
+				9264D6881A9D3E010094346B /* Makefile */,
+				9264D6831A9D3E010094346B /* atfork.c */,
+				9264D6841A9D3E010094346B /* cond.c */,
+				9264D6851A9D3E010094346B /* cond_timed.c */,
+				9264D6861A9D3E010094346B /* custom_stack.c */,
+				9264D6871A9D3E010094346B /* join.c */,
+				9264D6891A9D3E010094346B /* maxwidth.c */,
+				9264D68A1A9D3E010094346B /* mutex.c */,
+				9264D68B1A9D3E010094346B /* once.c */,
+				9264D68C1A9D3E010094346B /* rwlock-signal.c */,
+				9264D68D1A9D3E010094346B /* rwlock.c */,
+				9264D68E1A9D3E010094346B /* tsd.c */,
+				9264D68F1A9D3E010094346B /* wq_block_handoff.c */,
+				9264D6901A9D3E010094346B /* wq_kevent.c */,
+				9207EB711AA6E008006FFC86 /* wq_kevent_stress.c */,
+			);
+			path = tests;
+			sourceTree = SOURCE_ROOT;
+		};
 		C9169DD91603DE68005A2F8C /* kern */ = {
 			isa = PBXGroup;
 			children = (
@@ -411,6 +532,9 @@
 				C9D70EBE167AC7D100D52713 /* private */,
 				C948FCC015D187AD00180BF5 /* man */,
 				C9A325ED15B74FB600270056 /* src */,
+				E4027C171AFC2B6D00ACCF91 /* os */,
+				9264D6821A9D3E010094346B /* tests */,
+				9240BF331AA669EB003C99B4 /* tools */,
 				C9A3260B15B759A100270056 /* xcodescripts */,
 				C9CA27DA1602813000259F78 /* Frameworks */,
 				C9A325E315B7347000270056 /* Products */,
@@ -425,6 +549,7 @@
 				C90E7A9F15DC3C3800A06D48 /* libpthread.a */,
 				74E594A41613AAF4006C417B /* libpthread_eOS.a */,
 				C9CA27D91602813000259F78 /* pthread.kext */,
+				6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -433,6 +558,8 @@
 			isa = PBXGroup;
 			children = (
 				C9A325F315B7513200270056 /* internal.h */,
+				6EB232CA1B0EB29D005915CE /* resolver.h */,
+				6EB232C91B0EB29D005915CE /* resolver.c */,
 				C9A325EE15B7513200270056 /* mk_pthread_impl.c */,
 				C9A325EF15B7513200270056 /* plockstat.d */,
 				C9A325FA15B7513200270056 /* pthread.c */,
@@ -441,6 +568,7 @@
 				C9A325F115B7513200270056 /* pthread_cancelable.c */,
 				C9A325F215B7513200270056 /* pthread_cond.c */,
 				C9A325F515B7513200270056 /* pthread_mutex.c */,
+				6EB232C81B0EB29D005915CE /* pthread_mutex_up.c */,
 				C9A325F615B7513200270056 /* pthread_rwlock.c */,
 				C975D5DC15C9D16B0098ECD8 /* pthread_support.c */,
 				C9A325F815B7513200270056 /* pthread_tsd.c */,
@@ -468,7 +596,10 @@
 			isa = PBXGroup;
 			children = (
 				C91D01BA162893CD0002E29A /* kext.xcconfig */,
+				92C577E11A378A85004AF98B /* kext_debug.xcconfig */,
 				C9A3260C15B759B600270056 /* pthread.xcconfig */,
+				92C577EA1A378C9C004AF98B /* pthread_debug.xcconfig */,
+				6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */,
 				C99EA612161F8288003EBC56 /* eos.xcconfig */,
 				C9DCA2A115DC4F2000D057E2 /* install-manpages.sh */,
 				C9153094167ACAB8006BB094 /* install-symlinks.sh */,
@@ -519,6 +650,15 @@
 			path = private;
 			sourceTree = "<group>";
 		};
+		E4027C171AFC2B6D00ACCF91 /* os */ = {
+			isa = PBXGroup;
+			children = (
+				E473BE1819AC305A009C5A52 /* atomic.h */,
+				E473BE1719AC305A009C5A52 /* atomic_llsc.h */,
+			);
+			path = os;
+			sourceTree = "<group>";
+		};
 		FC5A372217CEB3D6008C323E /* sys */ = {
 			isa = PBXGroup;
 			children = (
@@ -550,6 +690,29 @@
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
+		6E8C16681B14F08A00C8987C /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				6E8C16721B14F08A00C8987C /* atomic_llsc.h in Headers */,
+				6E8C16741B14F08A00C8987C /* atomic.h in Headers */,
+				6E8C16711B14F08A00C8987C /* posix_sched.h in Headers */,
+				6E8C166F1B14F08A00C8987C /* introspection_private.h in Headers */,
+				6E8C166C1B14F08A00C8987C /* qos.h in Headers */,
+				6E8C16701B14F08A00C8987C /* tsd_private.h in Headers */,
+				6E8C16731B14F08A00C8987C /* qos_private.h in Headers */,
+				6E8C16761B14F08A00C8987C /* spinlock_private.h in Headers */,
+				6E8C16771B14F08A00C8987C /* workqueue_private.h in Headers */,
+				6E8C16781B14F08A00C8987C /* private.h in Headers */,
+				6E8C16691B14F08A00C8987C /* qos.h in Headers */,
+				6E8C166A1B14F08A00C8987C /* pthread.h in Headers */,
+				6E8C166B1B14F08A00C8987C /* pthread_impl.h in Headers */,
+				6E8C166D1B14F08A00C8987C /* pthread_spis.h in Headers */,
+				6E8C166E1B14F08A00C8987C /* sched.h in Headers */,
+				6E8C16751B14F08A00C8987C /* spawn.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 		74E594A11613AAF4006C417B /* Headers */ = {
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
@@ -577,7 +740,9 @@
 				E4657D4117284F7B007D1847 /* introspection_private.h in Headers */,
 				C9BB478D15E6ADF700F135B7 /* tsd_private.h in Headers */,
 				C99AD87B15DEC4BC0009A6F8 /* posix_sched.h in Headers */,
+				E473BE1919AC305A009C5A52 /* atomic_llsc.h in Headers */,
 				C9CCFB9D18B6D0910060CAAE /* qos_private.h in Headers */,
+				E473BE1A19AC305A009C5A52 /* atomic.h in Headers */,
 				C98C95D918FF1F4E005654FB /* spawn.h in Headers */,
 				C99AD87C15DEC5290009A6F8 /* spinlock_private.h in Headers */,
 				C9BB478B15E6ABD900F135B7 /* workqueue_private.h in Headers */,
@@ -595,6 +760,24 @@
 /* End PBXHeadersBuildPhase section */
 
 /* Begin PBXNativeTarget section */
+		6E8C16511B14F08A00C8987C /* libsystem_pthread.dylib introspection */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 6E8C167D1B14F08A00C8987C /* Build configuration list for PBXNativeTarget "libsystem_pthread.dylib introspection" */;
+			buildPhases = (
+				6E8C16521B14F08A00C8987C /* Generate dtrace header */,
+				6E8C16531B14F08A00C8987C /* Sources */,
+				6E8C16671B14F08A00C8987C /* Frameworks */,
+				6E8C16681B14F08A00C8987C /* Headers */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "libsystem_pthread.dylib introspection";
+			productName = Libpthread;
+			productReference = 6E8C16801B14F08A00C8987C /* libsystem_pthread.dylib */;
+			productType = "com.apple.product-type.library.dynamic";
+		};
 		74E594911613AAF4006C417B /* libpthread.a eOS */ = {
 			isa = PBXNativeTarget;
 			buildConfigurationList = 74E594A21613AAF4006C417B /* Build configuration list for PBXNativeTarget "libpthread.a eOS" */;
@@ -633,7 +816,6 @@
 			isa = PBXNativeTarget;
 			buildConfigurationList = C9A325E615B7347000270056 /* Build configuration list for PBXNativeTarget "libsystem_pthread.dylib" */;
 			buildPhases = (
-				C99AD87815DEBE450009A6F8 /* Generate dtrace header */,
 				C9A325DE15B7347000270056 /* Sources */,
 				C9A325DF15B7347000270056 /* Frameworks */,
 				C9A325E015B7347000270056 /* Headers */,
@@ -677,7 +859,7 @@
 		C9A325D915B7347000270056 /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 0500;
+				LastUpgradeCheck = 0700;
 				ORGANIZATIONNAME = "";
 			};
 			buildConfigurationList = C9A325DC15B7347000270056 /* Build configuration list for PBXProject "libpthread" */;
@@ -695,6 +877,7 @@
 				C90E7AAC15DC3D3300A06D48 /* All */,
 				C91D01B5162892FF0002E29A /* Kext */,
 				C98832C115DEB44000B3308E /* Embedded */,
+				6E8C16511B14F08A00C8987C /* libsystem_pthread.dylib introspection */,
 				C9A325E115B7347000270056 /* libsystem_pthread.dylib */,
 				C90E7A9E15DC3C3800A06D48 /* libpthread.a */,
 				74E594911613AAF4006C417B /* libpthread.a eOS */,
@@ -724,37 +907,37 @@
 /* End PBXRezBuildPhase section */
 
 /* Begin PBXShellScriptBuildPhase section */
-		C979E9FD18A2BF3D000951E5 /* Install Codes file */ = {
+		6E8C16521B14F08A00C8987C /* Generate dtrace header */ = {
 			isa = PBXShellScriptBuildPhase;
-			buildActionMask = 8;
+			buildActionMask = 2147483647;
 			files = (
 			);
 			inputPaths = (
-				"$(SRCROOT)/kern/kern_trace.h",
+				"$(SRCROOT)/src/plockstat.d",
 			);
-			name = "Install Codes file";
+			name = "Generate dtrace header";
 			outputPaths = (
-				"$(DSTROOT)$(INSTALL_PATH_PREFIX)/usr/share/misc/pthread.codes",
+				"$(DERIVED_FILE_DIR)/dtrace/plockstat.h",
 			);
-			runOnlyForDeploymentPostprocessing = 1;
-			shellPath = "/bin/bash -e";
-			shellScript = ". \"$PROJECT_DIR\"/xcodescripts/install-codes.sh";
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "dtrace -h -C -s \"${SCRIPT_INPUT_FILE_0}\" -o \"${SCRIPT_OUTPUT_FILE_0}\"";
 		};
-		C99AD87815DEBE450009A6F8 /* Generate dtrace header */ = {
+		C979E9FD18A2BF3D000951E5 /* Install Codes file */ = {
 			isa = PBXShellScriptBuildPhase;
-			buildActionMask = 2147483647;
+			buildActionMask = 8;
 			files = (
 			);
 			inputPaths = (
-				"$(SRCROOT)/src/plockstat.d",
+				"$(SRCROOT)/kern/kern_trace.h",
 			);
-			name = "Generate dtrace header";
+			name = "Install Codes file";
 			outputPaths = (
-				"$(DERIVED_FILE_DIR)/dtrace/plockstat.h",
+				"$(DSTROOT)/usr/local/share/misc/pthread.codes",
 			);
-			runOnlyForDeploymentPostprocessing = 0;
-			shellPath = /bin/sh;
-			shellScript = "dtrace -h -C -s ${SCRIPT_INPUT_FILE_0} -o ${SCRIPT_OUTPUT_FILE_0}";
+			runOnlyForDeploymentPostprocessing = 1;
+			shellPath = "/bin/bash -e";
+			shellScript = ". \"$PROJECT_DIR\"/xcodescripts/install-codes.sh";
 		};
 		C9A960B518452C1800AE10C8 /* Install lldbmacros */ = {
 			isa = PBXShellScriptBuildPhase;
@@ -820,15 +1003,43 @@
 /* End PBXShellScriptBuildPhase section */
 
 /* Begin PBXSourcesBuildPhase section */
+		6E8C16531B14F08A00C8987C /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				6E8C16541B14F08A00C8987C /* resolver.c in Sources */,
+				6E8C16551B14F08A00C8987C /* pthread.c in Sources */,
+				6E8C16561B14F08A00C8987C /* pthread_cancelable.c in Sources */,
+				6E8C16571B14F08A00C8987C /* plockstat.d in Sources */,
+				6E8C16581B14F08A00C8987C /* pthread_cond.c in Sources */,
+				6E8C16591B14F08A00C8987C /* pthread_mutex.c in Sources */,
+				6E8C165A1B14F08A00C8987C /* pthread_mutex_up.c in Sources */,
+				6E8C165B1B14F08A00C8987C /* qos.c in Sources */,
+				6E8C165C1B14F08A00C8987C /* pthread_rwlock.c in Sources */,
+				6E8C165D1B14F08A00C8987C /* pthread_tsd.c in Sources */,
+				6E8C165E1B14F08A00C8987C /* pthread_cancelable_cancel.c in Sources */,
+				6E8C165F1B14F08A00C8987C /* pthread_cancelable_legacy.c in Sources */,
+				6E8C16601B14F08A00C8987C /* pthread_cond_legacy.c in Sources */,
+				6E8C16611B14F08A00C8987C /* pthread_mutex_legacy.c in Sources */,
+				6E8C16621B14F08A00C8987C /* pthread_rwlock_legacy.c in Sources */,
+				6E8C16631B14F08A00C8987C /* pthread_support.c in Sources */,
+				6E8C16641B14F08A00C8987C /* thread_setup.c in Sources */,
+				6E8C16651B14F08A00C8987C /* pthread_atfork.c in Sources */,
+				6E8C16661B14F08A00C8987C /* pthread_asm.s in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 		74E594921613AAF4006C417B /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6EB232D01B0EB325005915CE /* resolver.c in Sources */,
 				74E594931613AAF4006C417B /* pthread.c in Sources */,
 				74E594941613AAF4006C417B /* pthread_cancelable.c in Sources */,
 				74E594A61613AB10006C417B /* pthread_cancelable_cancel.c in Sources */,
 				74E594951613AAF4006C417B /* pthread_cond.c in Sources */,
 				74E594961613AAF4006C417B /* pthread_mutex.c in Sources */,
+				6EB232CF1B0EB321005915CE /* pthread_mutex_up.c in Sources */,
 				74E594971613AAF4006C417B /* pthread_rwlock.c in Sources */,
 				74E594981613AAF4006C417B /* pthread_support.c in Sources */,
 				74E594991613AAF4006C417B /* pthread_tsd.c in Sources */,
@@ -843,11 +1054,13 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6EB232CE1B0EB31B005915CE /* resolver.c in Sources */,
 				C9244C1E1860D96D00075748 /* qos.c in Sources */,
 				C90E7AA415DC3C9D00A06D48 /* pthread.c in Sources */,
 				C90E7AA515DC3C9D00A06D48 /* pthread_cancelable.c in Sources */,
 				C90E7AA615DC3C9D00A06D48 /* pthread_cond.c in Sources */,
 				C90E7AA715DC3C9D00A06D48 /* pthread_mutex.c in Sources */,
+				6EB232CD1B0EB318005915CE /* pthread_mutex_up.c in Sources */,
 				C90E7AA815DC3C9D00A06D48 /* pthread_rwlock.c in Sources */,
 				C90E7AA915DC3C9D00A06D48 /* pthread_support.c in Sources */,
 				C90E7AAA15DC3C9D00A06D48 /* pthread_tsd.c in Sources */,
@@ -861,11 +1074,12 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6EB232CC1B0EB2F0005915CE /* resolver.c in Sources */,
 				C9A1BF4715C9A578006BB313 /* pthread.c in Sources */,
 				C9A1BF4815C9A578006BB313 /* pthread_cancelable.c in Sources */,
-				FC4DBBB316DEA8DA00503E47 /* plockstat.d in Sources */,
 				C9A1BF4915C9A578006BB313 /* pthread_cond.c in Sources */,
 				C9A1BF4A15C9A578006BB313 /* pthread_mutex.c in Sources */,
+				6EB232CB1B0EB2E2005915CE /* pthread_mutex_up.c in Sources */,
 				C9244C1D1860D8EF00075748 /* qos.c in Sources */,
 				C9A1BF4B15C9A578006BB313 /* pthread_rwlock.c in Sources */,
 				C9A1BF4C15C9A578006BB313 /* pthread_tsd.c in Sources */,
@@ -895,6 +1109,16 @@
 /* End PBXSourcesBuildPhase section */
 
 /* Begin PBXTargetDependency section */
+		6E8C16821B14F11800C8987C /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 6E8C16511B14F08A00C8987C /* libsystem_pthread.dylib introspection */;
+			targetProxy = 6E8C16811B14F11800C8987C /* PBXContainerItemProxy */;
+		};
+		6E8C16841B14F11B00C8987C /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 6E8C16511B14F08A00C8987C /* libsystem_pthread.dylib introspection */;
+			targetProxy = 6E8C16831B14F11B00C8987C /* PBXContainerItemProxy */;
+		};
 		74E594AB1613AD7F006C417B /* PBXTargetDependency */ = {
 			isa = PBXTargetDependency;
 			target = 74E594911613AAF4006C417B /* libpthread.a eOS */;
@@ -933,31 +1157,120 @@
 /* End PBXTargetDependency section */
 
 /* Begin XCBuildConfiguration section */
+		6E8C167E1B14F08A00C8987C /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */;
+			buildSettings = {
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = system_pthread;
+			};
+			name = Release;
+		};
+		6E8C167F1B14F08A00C8987C /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 6E8C16851B14F14000C8987C /* pthread_introspection.xcconfig */;
+			buildSettings = {
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = system_pthread;
+			};
+			name = Debug;
+		};
 		74E594A31613AAF4006C417B /* Release */ = {
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = C99EA612161F8288003EBC56 /* eos.xcconfig */;
+			buildSettings = {
+			};
+			name = Release;
+		};
+		92C577E21A378AC8004AF98B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = YES;
+			};
+			name = Debug;
+		};
+		92C577E31A378AC8004AF98B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		92C577E41A378AC8004AF98B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		92C577E51A378AC8004AF98B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		92C577E61A378AC8004AF98B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 92C577EA1A378C9C004AF98B /* pthread_debug.xcconfig */;
+			buildSettings = {
+				EXECUTABLE_PREFIX = lib;
+				PRODUCT_NAME = system_pthread;
+			};
+			name = Debug;
+		};
+		92C577E71A378AC8004AF98B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 92C577EA1A378C9C004AF98B /* pthread_debug.xcconfig */;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				BUILD_VARIANTS = normal;
+				EXECUTABLE_PREFIX = lib;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"$(BASE_PREPROCESSOR_MACROS)",
+					"VARIANT_DYLD=1",
+				);
+				INSTALL_PATH = /usr/local/lib/dyld;
+				OTHER_LDFLAGS = "";
+				PRODUCT_NAME = pthread;
+			};
+			name = Debug;
+		};
+		92C577E81A378AC8004AF98B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = C99EA612161F8288003EBC56 /* eos.xcconfig */;
+			buildSettings = {
+			};
+			name = Debug;
+		};
+		92C577E91A378AC8004AF98B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 92C577E11A378A85004AF98B /* kext_debug.xcconfig */;
 			buildSettings = {
 				COMBINE_HIDPI_IMAGES = YES;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = macosx.internal;
 			};
-			name = Release;
+			name = Debug;
 		};
 		C90E7AA015DC3C3800A06D48 /* Release */ = {
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = C9A3260C15B759B600270056 /* pthread.xcconfig */;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
-				COMBINE_HIDPI_IMAGES = YES;
+				BUILD_VARIANTS = normal;
 				EXECUTABLE_PREFIX = lib;
 				GCC_PREPROCESSOR_DEFINITIONS = (
 					"$(BASE_PREPROCESSOR_MACROS)",
 					"VARIANT_DYLD=1",
 				);
 				INSTALL_PATH = /usr/local/lib/dyld;
-				"INSTALL_PATH[sdk=iphonesimulator*]" = "$(SDKROOT)/usr/local/lib/dyld";
 				OTHER_LDFLAGS = "";
 				PRODUCT_NAME = pthread;
-				SDKROOT = macosx.internal;
 			};
 			name = Release;
 		};
@@ -996,14 +1309,8 @@
 			isa = XCBuildConfiguration;
 			baseConfigurationReference = C9A3260C15B759B600270056 /* pthread.xcconfig */;
 			buildSettings = {
-				COMBINE_HIDPI_IMAGES = YES;
 				EXECUTABLE_PREFIX = lib;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					"$(BASE_PREPROCESSOR_MACROS)",
-					"PLOCKSTAT=1",
-				);
 				PRODUCT_NAME = system_pthread;
-				SDKROOT = macosx.internal;
 			};
 			name = Release;
 		};
@@ -1021,10 +1328,20 @@
 /* End XCBuildConfiguration section */
 
 /* Begin XCConfigurationList section */
+		6E8C167D1B14F08A00C8987C /* Build configuration list for PBXNativeTarget "libsystem_pthread.dylib introspection" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				6E8C167E1B14F08A00C8987C /* Release */,
+				6E8C167F1B14F08A00C8987C /* Debug */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 		74E594A21613AAF4006C417B /* Build configuration list for PBXNativeTarget "libpthread.a eOS" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				74E594A31613AAF4006C417B /* Release */,
+				92C577E81A378AC8004AF98B /* Debug */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
@@ -1033,6 +1350,7 @@
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				C90E7AA015DC3C3800A06D48 /* Release */,
+				92C577E71A378AC8004AF98B /* Debug */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
@@ -1041,6 +1359,7 @@
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				C90E7AAE15DC3D3300A06D48 /* Release */,
+				92C577E31A378AC8004AF98B /* Debug */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
@@ -1049,6 +1368,7 @@
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				C91D01B7162892FF0002E29A /* Release */,
+				92C577E41A378AC8004AF98B /* Debug */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
@@ -1057,6 +1377,7 @@
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				C98832C215DEB44000B3308E /* Release */,
+				92C577E51A378AC8004AF98B /* Debug */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
@@ -1065,6 +1386,7 @@
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				C9A325E515B7347000270056 /* Release */,
+				92C577E21A378AC8004AF98B /* Debug */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
@@ -1073,6 +1395,7 @@
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				C9A325E815B7347000270056 /* Release */,
+				92C577E61A378AC8004AF98B /* Debug */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
@@ -1081,6 +1404,7 @@
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				C9CA27E61602813000259F78 /* Release */,
+				92C577E91A378AC8004AF98B /* Debug */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
diff --git a/lldbmacros/pthread.py b/lldbmacros/pthread.py
index 6b0c66a..d3d75c7 100644
--- a/lldbmacros/pthread.py
+++ b/lldbmacros/pthread.py
@@ -77,7 +77,7 @@ def GetPthreadSummary(thread):
 
 	return format.format(threadstr, hex(thread.thread_id), uthreadstr)
 
-@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'flags', 'wqflags'))
+@header("{0: <24s} {1: <24s} {2: <10s} {3: <10s} {4: <10s} {5: <10s} {6: <10s}".format('proc', 'wq', 'sched', 'req', 'idle', 'wq_flags', 'wq_lflags'))
 def GetPthreadWorkqueueSummary(wq):
 	format = "{0: <24s} {1: <24s} {2: <10d} {3: <10d} {4: <10d} {5: <10s} {6: <10s}"
 	procstr = str("{0: <#020x}".format(wq.wq_proc))
@@ -103,15 +103,17 @@ def GetPthreadWorkqueueSummary(wq):
 	
 	return format.format(procstr, wqstr, wq.wq_threads_scheduled, wq.wq_reqcount, wq.wq_thidlecount, "".join(flags), "".join(wqflags))
 
-@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint'))
+@header("{0: <24s} {1: <5s} {2: <5s} {3: <5s} {4: <5s} {5: <5s} {6: <5s} {7: <5s}".format('category', 'uint', 'uinit', 'lgcy', 'util', 'bckgd', 'maint', 'event'))
 def GetPthreadWorkqueueDetail(wq):
-	format = "  {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d}"
+	format = "  {0: <22s} {1: <5d} {2: <5d} {3: <5d} {4: <5d} {5: <5d} {6: <5d} {7: <5d}"
 	# requests
-	reqstr = format.format('requests', wq.wq_requests[0], wq.wq_requests[1], wq.wq_requests[2], wq.wq_requests[3], wq.wq_requests[4], wq.wq_requests[5])
-	ocstr = format.format('ocreqs', wq.wq_ocrequests[0], wq.wq_ocrequests[1], wq.wq_ocrequests[2], wq.wq_ocrequests[3], wq.wq_ocrequests[4], wq.wq_ocrequests[5])
-	schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5])
-	activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5])
-	return "\n".join([reqstr, ocstr, schedstr, activestr])
+	reqstr = format.format('requests', wq.wq_requests[0], wq.wq_requests[1], wq.wq_requests[2], wq.wq_requests[3], wq.wq_requests[4], wq.wq_requests[5], wq.wq_requests[6])
+	ocstr = format.format('ocreqs', wq.wq_ocrequests[0], wq.wq_ocrequests[1], wq.wq_ocrequests[2], wq.wq_ocrequests[3], wq.wq_ocrequests[4], wq.wq_ocrequests[5], wq.wq_ocrequests[6])
+	keventstr = format.format('kevent_reqs', wq.wq_kevent_requests[0], wq.wq_kevent_requests[1], wq.wq_kevent_requests[2], wq.wq_kevent_requests[3], wq.wq_kevent_requests[4], wq.wq_kevent_requests[5], wq.wq_kevent_requests[6])
+	ockeventstr = format.format('kevent_ocreqs', wq.wq_kevent_ocrequests[0], wq.wq_kevent_ocrequests[1], wq.wq_kevent_ocrequests[2], wq.wq_kevent_ocrequests[3], wq.wq_kevent_ocrequests[4], wq.wq_kevent_ocrequests[5], wq.wq_kevent_ocrequests[6])
+	schedstr = format.format('scheduled', wq.wq_thscheduled_count[0], wq.wq_thscheduled_count[1], wq.wq_thscheduled_count[2], wq.wq_thscheduled_count[3], wq.wq_thscheduled_count[4], wq.wq_thscheduled_count[5], wq.wq_thscheduled_count[6])
+	activestr = format.format('active', wq.wq_thactive_count[0], wq.wq_thactive_count[1], wq.wq_thactive_count[2], wq.wq_thactive_count[3], wq.wq_thactive_count[4], wq.wq_thactive_count[5], wq.wq_thactive_count[6])
+	return "\n".join([reqstr, ocstr, keventstr, ockeventstr, schedstr, activestr])
 
 @lldb_command('showpthreadstate')
 def PthreadCurrentMutex(cmd_args=None):
@@ -151,4 +153,4 @@ def ShowPthreadWorkqueue(cmd_args=None):
 	print GetPthreadWorkqueueDetail(wq)
 
 def __lldb_init_module(debugger, internal_dict):
-	pass
\ No newline at end of file
+	pass
diff --git a/man/pthread_main_np.3 b/man/pthread_main_np.3
new file mode 100644
index 0000000..a2a7d24
--- /dev/null
+++ b/man/pthread_main_np.3
@@ -0,0 +1,57 @@
+.\" Copyright (c) 2003 Alexey Zelkin <phantom@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd February 13, 2003
+.Dt PTHREAD_MAIN_NP 3
+.Os
+.Sh NAME
+.Nm pthread_main_np
+.Nd identify the initial thread
+.Sh LIBRARY
+.Lb libpthread
+.Sh SYNOPSIS
+.In pthread.h
+.Ft int
+.Fn pthread_main_np void
+.Sh DESCRIPTION
+The
+.Fn pthread_main_np
+function
+is to identify the initial thread.
+.Sh RETURN VALUES
+The
+.Fn pthread_main_np
+function returns
+1 if the calling thread is the initial thread,
+0 if the calling thread is not the initial thread,
+and \-1 if the thread's initialization has not yet completed.
+.Sh SEE ALSO
+.Xr pthread_create 3 ,
+.Xr pthread_equal 3 ,
+.Xr pthread_self 3
+.Sh AUTHORS
+This manual page was written by
+.An Alexey Zelkin Aq Mt phantom@FreeBSD.org .
diff --git a/os/atomic.h b/os/atomic.h
new file mode 100644
index 0000000..5ea860c
--- /dev/null
+++ b/os/atomic.h
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2008-2013 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+ * IMPORTANT: This header file describes INTERNAL interfaces to libplatform
+ * which are subject to change in future releases of Mac OS X. Any applications
+ * relying on these interfaces WILL break.
+ */
+
+#ifndef __OS_ATOMIC__
+#define __OS_ATOMIC__
+
+// generate error during codegen
+#define _os_atomic_unimplemented() \
+		({ __asm__(".err unimplemented"); })
+
+#pragma mark -
+#pragma mark memory_order
+
+typedef enum _os_atomic_memory_order
+{
+    _os_atomic_memory_order_relaxed,
+	_os_atomic_memory_order_consume,
+	_os_atomic_memory_order_acquire,
+	_os_atomic_memory_order_release,
+	_os_atomic_memory_order_acq_rel,
+	_os_atomic_memory_order_seq_cst,
+} _os_atomic_memory_order;
+
+#if !OS_ATOMIC_UP
+
+#define os_atomic_memory_order_relaxed \
+		_os_atomic_memory_order_relaxed
+#define os_atomic_memory_order_acquire \
+		_os_atomic_memory_order_acquire
+#define os_atomic_memory_order_release \
+		_os_atomic_memory_order_release
+#define os_atomic_memory_order_acq_rel \
+		_os_atomic_memory_order_acq_rel
+#define os_atomic_memory_order_seq_cst \
+		_os_atomic_memory_order_seq_cst
+
+#else // OS_ATOMIC_UP
+
+#define os_atomic_memory_order_relaxed \
+		_os_atomic_memory_order_relaxed
+#define os_atomic_memory_order_acquire \
+		_os_atomic_memory_order_relaxed
+#define os_atomic_memory_order_release \
+		_os_atomic_memory_order_relaxed
+#define os_atomic_memory_order_acq_rel \
+		_os_atomic_memory_order_relaxed
+#define os_atomic_memory_order_seq_cst \
+		_os_atomic_memory_order_relaxed
+
+#endif // OS_ATOMIC_UP
+
+#if __has_extension(c_generic_selections)
+#define _os_atomic_basetypeof(p) \
+		typeof(*_Generic((p), \
+		int*: (int*)(p), \
+		volatile int*: (int*)(p), \
+		unsigned int*: (unsigned int*)(p), \
+		volatile unsigned int*: (unsigned int*)(p), \
+		long*: (long*)(p), \
+		volatile long*: (long*)(p), \
+		unsigned long*: (unsigned long*)(p), \
+		volatile unsigned long*: (unsigned long*)(p), \
+		long long*: (long long*)(p), \
+		volatile long long*: (long long*)(p), \
+		unsigned long long*: (unsigned long long*)(p), \
+		volatile unsigned long long*: (unsigned long long*)(p), \
+		default: (void**)(p)))
+#endif
+
+#if __has_extension(c_atomic) && __has_extension(c_generic_selections)
+#pragma mark -
+#pragma mark c11
+
+#define _os_atomic_c11_atomic(p) \
+		_Generic((p), \
+		int*: (_Atomic(int)*)(p), \
+		volatile int*: (volatile _Atomic(int)*)(p), \
+		unsigned int*: (_Atomic(unsigned int)*)(p), \
+		volatile unsigned int*: (volatile _Atomic(unsigned int)*)(p), \
+		long*: (_Atomic(long)*)(p), \
+		volatile long*: (volatile _Atomic(long)*)(p), \
+		unsigned long*: (_Atomic(unsigned long)*)(p), \
+		volatile unsigned long*: (volatile _Atomic(unsigned long)*)(p), \
+		long long*: (_Atomic(long long)*)(p), \
+		volatile long long*: (volatile _Atomic(long long)*)(p), \
+		unsigned long long*: (_Atomic(unsigned long long)*)(p), \
+		volatile unsigned long long*: \
+				(volatile _Atomic(unsigned long long)*)(p), \
+		default: (volatile _Atomic(void*)*)(p))
+
+#define _os_atomic_barrier(m) \
+		({ __c11_atomic_thread_fence(os_atomic_memory_order_##m); })
+#define os_atomic_load(p, m) \
+		({ _os_atomic_basetypeof(p) _r = \
+		__c11_atomic_load(_os_atomic_c11_atomic(p), \
+		os_atomic_memory_order_##m); (typeof(*(p)))_r; })
+#define os_atomic_store(p, v, m) \
+		({ _os_atomic_basetypeof(p) _v = (v); \
+		__c11_atomic_store(_os_atomic_c11_atomic(p), _v, \
+		os_atomic_memory_order_##m); })
+#define os_atomic_xchg(p, v, m) \
+		({ _os_atomic_basetypeof(p) _v = (v), _r = \
+		__c11_atomic_exchange(_os_atomic_c11_atomic(p), _v, \
+		os_atomic_memory_order_##m); (typeof(*(p)))_r; })
+#define os_atomic_cmpxchg(p, e, v, m) \
+		({ _os_atomic_basetypeof(p) _v = (v), _r = (e); \
+		__c11_atomic_compare_exchange_strong(_os_atomic_c11_atomic(p), \
+		&_r, _v, os_atomic_memory_order_##m, \
+		os_atomic_memory_order_relaxed); })
+#define os_atomic_cmpxchgv(p, e, v, g, m) \
+		({ _os_atomic_basetypeof(p) _v = (v), _r = (e); _Bool _b = \
+		__c11_atomic_compare_exchange_strong(_os_atomic_c11_atomic(p), \
+		&_r, _v, os_atomic_memory_order_##m, \
+		os_atomic_memory_order_relaxed); *(g) = (typeof(*(p)))_r; _b; })
+#define os_atomic_cmpxchgvw(p, e, v, g, m) \
+		({ _os_atomic_basetypeof(p) _v = (v), _r = (e); _Bool _b = \
+		__c11_atomic_compare_exchange_weak(_os_atomic_c11_atomic(p), \
+		&_r, _v, os_atomic_memory_order_##m, \
+		os_atomic_memory_order_relaxed); *(g) = (typeof(*(p)))_r;  _b; })
+#define _os_atomic_c11_op(p, v, m, o, op) \
+		({ _os_atomic_basetypeof(p) _v = (v), _r = \
+		__c11_atomic_fetch_##o(_os_atomic_c11_atomic(p), _v, \
+		os_atomic_memory_order_##m); (typeof(*(p)))(_r op _v); })
+#define _os_atomic_c11_op_orig(p, v, m, o, op) \
+		({ _os_atomic_basetypeof(p) _v = (v), _r = \
+		__c11_atomic_fetch_##o(_os_atomic_c11_atomic(p), _v, \
+		os_atomic_memory_order_##m); (typeof(*(p)))_r; })
+
+#define os_atomic_add(p, v, m) \
+		_os_atomic_c11_op((p), (v), m, add, +)
+#define os_atomic_add_orig(p, v, m) \
+		_os_atomic_c11_op_orig((p), (v), m, add, +)
+#define os_atomic_sub(p, v, m) \
+		_os_atomic_c11_op((p), (v), m, sub, -)
+#define os_atomic_sub_orig(p, v, m) \
+		_os_atomic_c11_op_orig((p), (v), m, sub, -)
+#define os_atomic_and(p, v, m) \
+		_os_atomic_c11_op((p), (v), m, and, &)
+#define os_atomic_and_orig(p, v, m) \
+		_os_atomic_c11_op_orig((p), (v), m, and, &)
+#define os_atomic_or(p, v, m) \
+		_os_atomic_c11_op((p), (v), m, or, |)
+#define os_atomic_or_orig(p, v, m) \
+		_os_atomic_c11_op_orig((p), (v), m, or, |)
+#define os_atomic_xor(p, v, m) \
+		_os_atomic_c11_op((p), (v), m, xor, ^)
+#define os_atomic_xor_orig(p, v, m) \
+		_os_atomic_c11_op_orig((p), (v), m, xor, ^)
+
+#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)
+#pragma mark -
+#pragma mark gnu99
+
+#define _os_atomic_full_barrier()	\
+		__sync_synchronize()
+#define _os_atomic_barrier(m)	\
+		({ switch(os_atomic_memory_order_##m) { \
+		case _os_atomic_memory_order_relaxed: \
+			break; \
+		default: \
+			_os_atomic_full_barrier(); break; \
+		} })
+// seq_cst: only emulate explicit store(seq_cst) -> load(seq_cst)
+#define os_atomic_load(p, m) \
+		({ typeof(*(p)) _r = *(p); \
+		switch(os_atomic_memory_order_##m) { \
+		case _os_atomic_memory_order_seq_cst: \
+			_os_atomic_barrier(m); /* fallthrough */ \
+		case _os_atomic_memory_order_relaxed: \
+			break; \
+		default: \
+			_os_atomic_unimplemented(); break; \
+		} _r; })
+#define os_atomic_store(p, v, m) \
+		({ switch(os_atomic_memory_order_##m) { \
+		case _os_atomic_memory_order_release: \
+		case _os_atomic_memory_order_seq_cst: \
+			_os_atomic_barrier(m); /* fallthrough */ \
+		case _os_atomic_memory_order_relaxed: \
+			*(p) = (v); break; \
+		default: \
+			_os_atomic_unimplemented(); break; \
+		} switch(os_atomic_memory_order_##m) { \
+		case _os_atomic_memory_order_seq_cst: \
+			_os_atomic_barrier(m); break; \
+		default: \
+			break; \
+		} })
+#if __has_builtin(__sync_swap)
+#define os_atomic_xchg(p, v, m) \
+		((typeof(*(p)))__sync_swap((p), (v)))
+#else
+#define os_atomic_xchg(p, v, m) \
+		((typeof(*(p)))__sync_lock_test_and_set((p), (v)))
+#endif
+#define os_atomic_cmpxchg(p, e, v, m) \
+		__sync_bool_compare_and_swap((p), (e), (v))
+#define os_atomic_cmpxchgv(p, e, v, g, m) \
+		({ typeof(*(g)) _e = (e), _r = \
+		__sync_val_compare_and_swap((p), _e, (v)); \
+		bool _b = (_e == _r); *(g) = _r; _b; })
+#define os_atomic_cmpxchgvw(p, e, v, g, m) \
+		os_atomic_cmpxchgv((p), (e), (v), (g), m)
+
+#define os_atomic_add(p, v, m) \
+		__sync_add_and_fetch((p), (v))
+#define os_atomic_add_orig(p, v, m) \
+		__sync_fetch_and_add((p), (v))
+#define os_atomic_sub(p, v, m) \
+		__sync_sub_and_fetch((p), (v))
+#define os_atomic_sub_orig(p, v, m) \
+		__sync_fetch_and_sub((p), (v))
+#define os_atomic_and(p, v, m) \
+		__sync_and_and_fetch((p), (v))
+#define os_atomic_and_orig(p, v, m) \
+		__sync_fetch_and_and((p), (v))
+#define os_atomic_or(p, v, m) \
+		__sync_or_and_fetch((p), (v))
+#define os_atomic_or_orig(p, v, m) \
+		__sync_fetch_and_or((p), (v))
+#define os_atomic_xor(p, v, m) \
+		__sync_xor_and_fetch((p), (v))
+#define os_atomic_xor_orig(p, v, m) \
+		__sync_fetch_and_xor((p), (v))
+
+#if defined(__x86_64__) || defined(__i386__)
+// GCC emits nothing for __sync_synchronize() on x86_64 & i386
+#undef _os_atomic_full_barrier
+#define _os_atomic_full_barrier() \
+		({ __asm__ __volatile__( \
+		"mfence" \
+		: : : "memory"); })
+#undef os_atomic_load
+#define os_atomic_load(p, m) \
+		({ switch(os_atomic_memory_order_##m) { \
+		case _os_atomic_memory_order_seq_cst: \
+		case _os_atomic_memory_order_relaxed: \
+			break; \
+		default: \
+			_os_atomic_unimplemented(); break; \
+		} *(p); })
+// xchg is faster than store + mfence
+#undef os_atomic_store
+#define os_atomic_store(p, v, m) \
+		({ switch(os_atomic_memory_order_##m) { \
+		case _os_atomic_memory_order_relaxed: \
+		case _os_atomic_memory_order_release: \
+			*(p) = (v); break; \
+		case _os_atomic_memory_order_seq_cst: \
+			(void)os_atomic_xchg((p), (v), m); break; \
+		default:\
+			_os_atomic_unimplemented(); break; \
+		} })
+#endif
+
+#else
+#error "Please upgrade to GCC 4.2 or newer."
+#endif
+
+#pragma mark -
+#pragma mark generic
+
+// assume atomic builtins provide barriers
+#define os_atomic_barrier(m)
+// see comment in os_once.c
+#define os_atomic_maximally_synchronizing_barrier() \
+		_os_atomic_barrier(seq_cst)
+
+#define os_atomic_load2o(p, f, m) \
+		os_atomic_load(&(p)->f, m)
+#define os_atomic_store2o(p, f, v, m) \
+		os_atomic_store(&(p)->f, (v), m)
+#define os_atomic_xchg2o(p, f, v, m) \
+		os_atomic_xchg(&(p)->f, (v), m)
+#define os_atomic_cmpxchg2o(p, f, e, v, m) \
+		os_atomic_cmpxchg(&(p)->f, (e), (v), m)
+#define os_atomic_cmpxchgv2o(p, f, e, v, g, m) \
+		os_atomic_cmpxchgv(&(p)->f, (e), (v), (g), m)
+#define os_atomic_cmpxchgvw2o(p, f, e, v, g, m) \
+		os_atomic_cmpxchgvw(&(p)->f, (e), (v), (g), m)
+#define os_atomic_add2o(p, f, v, m) \
+		os_atomic_add(&(p)->f, (v), m)
+#define os_atomic_add_orig2o(p, f, v, m) \
+		os_atomic_add_orig(&(p)->f, (v), m)
+#define os_atomic_sub2o(p, f, v, m) \
+		os_atomic_sub(&(p)->f, (v), m)
+#define os_atomic_sub_orig2o(p, f, v, m) \
+		os_atomic_sub_orig(&(p)->f, (v), m)
+#define os_atomic_and2o(p, f, v, m) \
+		os_atomic_and(&(p)->f, (v), m)
+#define os_atomic_and_orig2o(p, f, v, m) \
+		os_atomic_and_orig(&(p)->f, (v), m)
+#define os_atomic_or2o(p, f, v, m) \
+		os_atomic_or(&(p)->f, (v), m)
+#define os_atomic_or_orig2o(p, f, v, m) \
+		os_atomic_or_orig(&(p)->f, (v), m)
+#define os_atomic_xor2o(p, f, v, m) \
+		os_atomic_xor(&(p)->f, (v), m)
+#define os_atomic_xor_orig2o(p, f, v, m) \
+		os_atomic_xor_orig(&(p)->f, (v), m)
+
+#define os_atomic_inc(p, m) \
+		os_atomic_add((p), 1, m)
+#define os_atomic_inc_orig(p, m) \
+		os_atomic_add_orig((p), 1, m)
+#define os_atomic_inc2o(p, f, m) \
+		os_atomic_add2o(p, f, 1, m)
+#define os_atomic_inc_orig2o(p, f, m) \
+		os_atomic_add_orig2o(p, f, 1, m)
+#define os_atomic_dec(p, m) \
+		os_atomic_sub((p), 1, m)
+#define os_atomic_dec_orig(p, m) \
+		os_atomic_sub_orig((p), 1, m)
+#define os_atomic_dec2o(p, f, m) \
+		os_atomic_sub2o(p, f, 1, m)
+#define os_atomic_dec_orig2o(p, f, m) \
+		os_atomic_sub_orig2o(p, f, 1, m)
+
+#define os_atomic_tsx_xacq_cmpxchgv(p, e, v, g) \
+		os_atomic_cmpxchgv((p), (e), (v), (g), acquire)
+#define os_atomic_tsx_xrel_store(p, v) \
+		os_atomic_store(p, v, release)
+#define os_atomic_tsx_xacq_cmpxchgv2o(p, f, e, v, g) \
+		os_atomic_tsx_xacq_cmpxchgv(&(p)->f, (e), (v), (g))
+#define os_atomic_tsx_xrel_store2o(p, f, v) \
+		os_atomic_tsx_xrel_store(&(p)->f, (v))
+
+#if defined(__x86_64__) || defined(__i386__)
+#pragma mark -
+#pragma mark x86
+
+#undef os_atomic_maximally_synchronizing_barrier
+#ifdef __LP64__
+#define os_atomic_maximally_synchronizing_barrier() \
+		({ unsigned long _clbr; __asm__ __volatile__( \
+		"cpuid" \
+		: "=a" (_clbr) : "0" (0) : "rbx", "rcx", "rdx", "cc", "memory"); })
+#else
+#ifdef __llvm__
+#define os_atomic_maximally_synchronizing_barrier() \
+		({ unsigned long _clbr; __asm__ __volatile__( \
+		"cpuid" \
+		: "=a" (_clbr) : "0" (0) : "ebx", "ecx", "edx", "cc", "memory"); })
+#else // gcc does not allow inline i386 asm to clobber ebx
+#define os_atomic_maximally_synchronizing_barrier() \
+		({ unsigned long _clbr; __asm__ __volatile__( \
+		"pushl	%%ebx\n\t" \
+		"cpuid\n\t" \
+		"popl	%%ebx" \
+		: "=a" (_clbr) : "0" (0) : "ecx", "edx", "cc", "memory"); })
+#endif
+#endif
+
+
+#endif
+
+
+#endif // __OS_ATOMIC__
diff --git a/os/atomic_llsc.h b/os/atomic_llsc.h
new file mode 100644
index 0000000..d53e0ae
--- /dev/null
+++ b/os/atomic_llsc.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2013 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+/*
+ * IMPORTANT: This header file describes INTERNAL interfaces to libplatform
+ * which are subject to change in future releases of Mac OS X. Any applications
+ * relying on these interfaces WILL break.
+ */
+
+// Generated by os/atomic_gen.sh
+
+#ifndef __OS_ATOMIC_LLSC__
+#define __OS_ATOMIC_LLSC__
+
+
+#endif // __OS_ATOMIC_LLSC__
diff --git a/private/qos_private.h b/private/qos_private.h
index 46763fa..77b96df 100644
--- a/private/qos_private.h
+++ b/private/qos_private.h
@@ -40,6 +40,7 @@ typedef unsigned long pthread_priority_t;
 // masks for splitting the handling the contents of a pthread_priority_t, the mapping from
 // qos_class_t to the class bits, however, is intentionally not exposed.
 #define _PTHREAD_PRIORITY_FLAGS_MASK		(~0xffffff)
+#define _PTHREAD_PRIORITY_FLAGS_SHIFT       (24ull)
 #define _PTHREAD_PRIORITY_QOS_CLASS_MASK	0x00ffff00
 #define _PTHREAD_PRIORITY_QOS_CLASS_SHIFT	(8ull)
 #define _PTHREAD_PRIORITY_PRIORITY_MASK		0x000000ff
@@ -51,6 +52,21 @@ typedef unsigned long pthread_priority_t;
 #define _PTHREAD_PRIORITY_ENFORCE_FLAG		0x10000000
 #define _PTHREAD_PRIORITY_OVERRIDE_FLAG		0x08000000
 
+// libdispatch defines the following, so it's not safe to use for anything we
+// expect to be passed in from userspace
+//#define _PTHREAD_PRIORITY_DEFAULTQUEUE_FLAG 0x04000000
+
+// The event manager flag indicates that this thread/request is for a event
+// manager thread.  There can only ever be one event manager thread at a time and
+// it is brought up at the highest of all event manager priorities passed to the
+// kext.
+#define _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG    0x02000000
+
+// Used to indicate to the pthread kext that the provided event manager thread
+// priority is actually a scheduling priority not a QoS.  We can have ROOTQUEUE_FLAG
+// perform double duty because it's never provided to the kernel.
+#define _PTHREAD_PRIORITY_SCHED_PRI_FLAG	0x20000000
+
 // redeffed here to avoid leaving __QOS_ENUM defined in the public header
 #define __QOS_ENUM(name, type, ...) enum { __VA_ARGS__ }; typedef type name##_t
 #define __QOS_AVAILABLE_STARTING(x, y)
@@ -73,6 +89,8 @@ __QOS_ENUM(_pthread_set_flags, unsigned int,
 		   __QOS_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0) = 0x2,
    _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG
 		   __QOS_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0) = 0x4,
+   _PTHREAD_SET_SELF_TIMESHARE_FLAG
+		   __QOS_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0) = 0x8,
 );
 
 #undef __QOS_ENUM
@@ -157,6 +175,11 @@ __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0)
 int
 pthread_set_fixedpriority_self(void);
 
+// Inverse of pthread_set_fixedpriority_self()
+__OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0)
+int
+pthread_set_timeshare_self(void);
+
 #endif
 
 __END_DECLS
diff --git a/private/workqueue_private.h b/private/workqueue_private.h
index 533103f..74b3a1b 100644
--- a/private/workqueue_private.h
+++ b/private/workqueue_private.h
@@ -25,6 +25,7 @@
 #define __PTHREAD_WORKQUEUE_H__
 
 #include <sys/cdefs.h>
+#include <sys/event.h>
 #include <Availability.h>
 #include <pthread/pthread.h>
 #include <pthread/qos.h>
@@ -32,7 +33,7 @@
 #include <pthread/qos_private.h>
 #endif
 
-#define PTHREAD_WORKQUEUE_SPI_VERSION 20140730
+#define PTHREAD_WORKQUEUE_SPI_VERSION 20150304
 
 /* Feature checking flags, returned by _pthread_workqueue_supported()
  *
@@ -42,6 +43,7 @@
 #define WORKQ_FEATURE_DISPATCHFUNC	0x01	// pthread_workqueue_setdispatch_np is supported (or not)
 #define WORKQ_FEATURE_FINEPRIO		0x02	// fine grained pthread workq priorities
 #define WORKQ_FEATURE_MAINTENANCE	0x10	// QOS class maintenance
+#define WORKQ_FEATURE_KEVENT        0x20    // Support for direct kevent delivery
 
 /* Legacy dispatch priority bands */
 
@@ -62,6 +64,9 @@ __BEGIN_DECLS
 typedef void (*pthread_workqueue_function_t)(int queue_priority, int options, void *ctxt);
 // New callback prototype, used with pthread_workqueue_init
 typedef void (*pthread_workqueue_function2_t)(pthread_priority_t priority);
+// Newer callback prototype, used in conjection with function2 when there are kevents to deliver
+// both parameters are in/out parameters
+typedef void (*pthread_workqueue_function_kevent_t)(void **events, int *nevents);
 
 // Initialises the pthread workqueue subsystem, passing the new-style callback prototype,
 // the dispatchoffset and an unused flags field.
@@ -69,6 +74,10 @@ __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0)
 int
 _pthread_workqueue_init(pthread_workqueue_function2_t func, int offset, int flags);
 
+__OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0)
+int
+_pthread_workqueue_init_with_kevent(pthread_workqueue_function2_t queue_func, pthread_workqueue_function_kevent_t kevent_func, int offset, int flags);
+
 // Non-zero enables kill on current thread, zero disables it.
 __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_3_2)
 int
@@ -99,6 +108,10 @@ __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0)
 int
 _pthread_workqueue_addthreads(int numthreads, pthread_priority_t priority);
 
+__OSX_AVAILABLE_STARTING(__MAC_10_11, __IPHONE_9_0)
+int
+_pthread_workqueue_set_event_manager_priority(pthread_priority_t priority);
+
 // Apply a QoS override without allocating userspace memory
 __OSX_AVAILABLE_STARTING(__MAC_10_10, __IPHONE_8_0)
 int
diff --git a/pthread/pthread.h b/pthread/pthread.h
index 4169176..c517f26 100644
--- a/pthread/pthread.h
+++ b/pthread/pthread.h
@@ -150,7 +150,6 @@ __BEGIN_DECLS
 #define PTHREAD_SCOPE_SYSTEM         1
 #define PTHREAD_SCOPE_PROCESS        2
 
-/* We only support PTHREAD_PROCESS_PRIVATE */
 #define PTHREAD_PROCESS_SHARED         1
 #define PTHREAD_PROCESS_PRIVATE        2
 
diff --git a/pthread/pthread_spis.h b/pthread/pthread_spis.h
index b76c238..c54d3f5 100644
--- a/pthread/pthread_spis.h
+++ b/pthread/pthread_spis.h
@@ -73,6 +73,7 @@ int pthread_mutexattr_setpolicy_np(pthread_mutexattr_t *, int );
 
 #endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */
 
+
 __END_DECLS
 
 #endif /* _PTHREAD_SPIS_H */
diff --git a/src/internal.h b/src/internal.h
index 057428f..5a13fe3 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -59,6 +59,7 @@
 struct _pthread_attr_t; /* forward reference */
 typedef struct _pthread_attr_t pthread_attr_t;
 
+#include <_simple.h>
 #include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -176,7 +177,7 @@ typedef struct _pthread {
 	size_t guardsize;	// guard page size in bytes
 
 	// thread specific data
-	void *tsd[_EXTERNAL_POSIX_THREAD_KEYS_MAX + _INTERNAL_POSIX_THREAD_KEYS_MAX];
+	void *tsd[_EXTERNAL_POSIX_THREAD_KEYS_MAX + _INTERNAL_POSIX_THREAD_KEYS_MAX] __attribute__ ((aligned (16)));
 } *pthread_t;
 
 
@@ -347,12 +348,15 @@ _pthread_selfid_direct(void)
 #define _PTHREAD_NO_SIG			0x00000000
 #define _PTHREAD_MUTEX_ATTR_SIG		0x4D545841  /* 'MTXA' */
 #define _PTHREAD_MUTEX_SIG		0x4D555458  /* 'MUTX' */
+#define _PTHREAD_MUTEX_SIG_fast		0x4D55545A  /* 'MUTZ' */
+#define _PTHREAD_MUTEX_SIG_MASK		0xfffffffd
+#define _PTHREAD_MUTEX_SIG_CMP		0x4D555458  /* _PTHREAD_MUTEX_SIG & _PTHREAD_MUTEX_SIG_MASK */
 #define _PTHREAD_MUTEX_SIG_init		0x32AAABA7  /* [almost] ~'MUTX' */
 #define _PTHREAD_ERRORCHECK_MUTEX_SIG_init      0x32AAABA1
 #define _PTHREAD_RECURSIVE_MUTEX_SIG_init       0x32AAABA2
-#define _PTHREAD_FIRSTFIT_MUTEX_SIG_init       0x32AAABA3
+#define _PTHREAD_FIRSTFIT_MUTEX_SIG_init        0x32AAABA3
 #define _PTHREAD_MUTEX_SIG_init_MASK            0xfffffff0
-#define _PTHREAD_MUTEX_SIG_CMP                  0x32AAABA0
+#define _PTHREAD_MUTEX_SIG_init_CMP             0x32AAABA0
 #define _PTHREAD_COND_ATTR_SIG		0x434E4441  /* 'CNDA' */
 #define _PTHREAD_COND_SIG		0x434F4E44  /* 'COND' */
 #define _PTHREAD_COND_SIG_init		0x3CB0B1BB  /* [almost] ~'COND' */
@@ -394,6 +398,8 @@ extern boolean_t swtch_pri(int);
 #define PTHREAD_NORETURN __attribute__((__noreturn__))
 #define PTHREAD_ALWAYS_INLINE __attribute__((always_inline))
 #define PTHREAD_NOINLINE __attribute__((noinline))
+#define PTHREAD_WEAK __attribute__((weak))
+#define PTHREAD_USED __attribute__((used))
 
 #include "kern/kern_internal.h"
 
@@ -450,13 +456,17 @@ PTHREAD_NOEXPORT
 void
 _pthread_set_main_qos(pthread_priority_t qos);
 
+PTHREAD_NOEXPORT
+void
+_pthread_key_global_init(const char *envp[]);
+
 PTHREAD_EXPORT
 void
 _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
 
 PTHREAD_EXPORT
 void
-_pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse);
+_pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int flags, int nkevents);
 
 PTHREAD_NOEXPORT
 void
@@ -525,4 +535,27 @@ _pthread_globals(void)
 			     NULL);
 }
 
+#pragma mark _pthread_mutex_check_signature
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_check_signature_fast(_pthread_mutex *mutex)
+{
+	return os_fastpath(mutex->sig == _PTHREAD_MUTEX_SIG_fast);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_check_signature(_pthread_mutex *mutex)
+{
+	return os_fastpath((mutex->sig & _PTHREAD_MUTEX_SIG_MASK) == _PTHREAD_MUTEX_SIG_CMP);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline bool
+_pthread_mutex_check_signature_init(_pthread_mutex *mutex)
+{
+	return os_fastpath((mutex->sig & _PTHREAD_MUTEX_SIG_init_MASK) == _PTHREAD_MUTEX_SIG_init_CMP);
+}
+
 #endif /* _POSIX_PTHREAD_INTERNALS_H */
diff --git a/src/pthread.c b/src/pthread.c
index de2d6db..2dec190 100644
--- a/src/pthread.c
+++ b/src/pthread.c
@@ -124,7 +124,9 @@ typedef struct _pthread_reap_msg_t {
 
 #define pthreadsize ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
 static pthread_attr_t _pthread_attr_default = {0};
-static struct _pthread _thread = {0};
+
+// The main thread's pthread_t
+static struct _pthread _thread __attribute__((aligned(4096))) = {0};
 
 static int default_priority;
 static int max_priority;
@@ -133,6 +135,7 @@ static int pthread_concurrency;
 
 // work queue support data
 static void (*__libdispatch_workerfunction)(pthread_priority_t) = NULL;
+static void (*__libdispatch_keventfunction)(void **events, int *nevents) = NULL;
 static int __libdispatch_offset;
 
 // supported feature set
@@ -171,8 +174,8 @@ static inline void _pthread_introspection_thread_start(pthread_t t);
 static inline void _pthread_introspection_thread_terminate(pthread_t t, void *freeaddr, size_t freesize, bool destroy);
 static inline void _pthread_introspection_thread_destroy(pthread_t t);
 
-extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse);
-extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
+extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse); // trampoline into _pthread_wqthread
+extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); // trampoline into _pthread_start
 
 void pthread_workqueue_atfork_child(void);
 
@@ -218,9 +221,9 @@ static const mach_vm_address_t PTHREAD_STACK_HINT = 0xB0000000;
 #error no PTHREAD_STACK_HINT for this architecture
 #endif
 
-#ifdef __i386__
+#if defined(__i386__) && defined(static_assert)
 // Check for regression of <rdar://problem/13249323>
-struct rdar_13249323_regression_static_assert { unsigned a[offsetof(struct _pthread, err_no) == 68 ? 1 : -1]; };
+static_assert(offsetof(struct _pthread, err_no) == 68);
 #endif
 
 // Allocate a thread structure, stack and guard page.
@@ -300,7 +303,7 @@ _pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack)
 	}
 	
 	if (t != NULL) {
-		_pthread_struct_init(t, attrs, *stack, 0, 0);
+		_pthread_struct_init(t, attrs, *stack, attrs->stacksize, 0);
 		t->freeaddr = (void *)allocaddr;
 		t->freesize = allocsize;
 		*thread = t;
@@ -316,7 +319,9 @@ _pthread_deallocate(pthread_t t)
 {
 	// Don't free the main thread.
 	if (t != &_thread) {
-		(void)mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
+		kern_return_t ret;
+		ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
+		PTHREAD_ASSERT(ret == KERN_SUCCESS);
 	}
 	return 0;
 }
@@ -329,17 +334,15 @@ _pthread_terminate(pthread_t t)
 	PTHREAD_ASSERT(t == pthread_self());
 	
 	uintptr_t freeaddr = (uintptr_t)t->freeaddr;
-	size_t freesize = t->freesize - pthreadsize;
+	size_t freesize = t->freesize;
 
 	mach_port_t kport = _pthread_kernel_thread(t);
 	semaphore_t joinsem = t->joiner_notify;
 
 	_pthread_dealloc_reply_port(t);
 
-	// Shrink the pthread_t so that it does not include the stack
-	// so that we're always responsible for deallocating the stack.
-	t->freeaddr += freesize;
-	t->freesize = pthreadsize;
+	// If the pthread_t sticks around after the __bsdthread_terminate, we'll 
+	// need to free it later
 
 	// After the call to __pthread_remove_thread, it is only safe to
 	// dereference the pthread_t structure if EBUSY has been returned.
@@ -350,11 +353,20 @@ _pthread_terminate(pthread_t t)
 	if (t == &_thread) {
 		// Don't free the main thread.
 		freesize = 0;
-	} else if (destroy) {
-		// We were told not to keep the pthread_t structure around, so
-		// instead of just deallocating the stack, we should deallocate
-		// the entire structure.
-		freesize += pthreadsize;
+	} else if (!destroy) {
+		// We were told to keep the pthread_t structure around.  In the common
+		// case, the pthread structure itself is part of the allocation
+		// described by freeaddr/freesize, in which case we need to split and
+		// only deallocate the area below the pthread structure.  In the event
+		// of a custom stack, the freeaddr/size will be the pthread structure
+		// itself, in which case we shouldn't free anything.
+		if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){
+			freesize = trunc_page((uintptr_t)t - (uintptr_t)freeaddr);
+			t->freeaddr += freesize;
+			t->freesize -= freesize;
+		} else {
+			freesize = 0;
+		}
 	}
 	if (freesize == 0) {
 		freeaddr = 0;
@@ -646,7 +658,7 @@ void
 _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void *arg, size_t stacksize, unsigned int pflags)
 {
 	if ((pflags & PTHREAD_START_CUSTOM) == 0) {
-		void *stackaddr = self;
+		uintptr_t stackaddr = self;
 		_pthread_struct_init(self, &_pthread_attr_default, stackaddr, stacksize, 1);
 
 		if (pflags & PTHREAD_START_SETSCHED) {
@@ -681,25 +693,30 @@ _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void *ar
 static void
 _pthread_struct_init(pthread_t t,
 		     const pthread_attr_t *attrs,
-		     void *stack,
+		     void *stackaddr,
 		     size_t stacksize,
-		     int kernalloc)
+			 int kernalloc)
 {
 	t->sig = _PTHREAD_SIG;
 	t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = t;
 	t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
 	LOCK_INIT(t->lock);
+
+	t->stacksize = stacksize;
+	t->stackaddr = stackaddr;
+
 	t->kernalloc = kernalloc;
-	if (kernalloc != 0) {
-		uintptr_t stackaddr = (uintptr_t)t;
-		t->stacksize = stacksize;
-		t->stackaddr = (void *)stackaddr;
-		t->freeaddr = (void *)(uintptr_t)(stackaddr - stacksize - vm_page_size);
-		t->freesize = pthreadsize + stacksize + vm_page_size;
-	} else {
-		t->stacksize = attrs->stacksize;
-		t->stackaddr = (void *)stack;
+	if (kernalloc){
+		/* 
+		 * The pthread may be offset into a page.  In that event, by contract
+		 * with the kernel, the allocation will extend pthreadsize from the
+		 * start of the next page.  There's also one page worth of allocation
+		 * below stacksize for the guard page. <rdar://problem/19941744> 
+		 */
+		t->freeaddr = (stackaddr - stacksize) - vm_page_size;
+		t->freesize = (round_page((uintptr_t)stackaddr) + pthreadsize) - (uintptr_t)t->freeaddr;
 	}
+
 	t->guardsize = attrs->guardsize;
 	t->detached = attrs->detached;
 	t->inherit = attrs->inherit;
@@ -1590,10 +1607,10 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs, const char *en
 
 	void *stackaddr;
 	size_t stacksize = DFLSSIZ;
-    	size_t len = sizeof(stackaddr);
-    	int mib[] = { CTL_KERN, KERN_USRSTACK };
-    	if (__sysctl(mib, 2, &stackaddr, &len, NULL, 0) != 0) {
-       		stackaddr = (void *)USRSTACK;
+	size_t len = sizeof(stackaddr);
+	int mib[] = { CTL_KERN, KERN_USRSTACK };
+	if (__sysctl(mib, 2, &stackaddr, &len, NULL, 0) != 0) {
+		stackaddr = (void *)USRSTACK;
 	}
 
 	pthread_t thread = &_thread;
@@ -1612,6 +1629,9 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs, const char *en
 	// Set up kernel entry points with __bsdthread_register.
 	pthread_workqueue_atfork_child();
 
+	// Have pthread_key do its init envvar checks.
+	_pthread_key_global_init(envp);
+
 	return 0;
 }
 
@@ -1788,12 +1808,7 @@ _pthread_clear_qos_tsd(mach_port_t thread_port)
 		LOCK(_pthread_list_lock);
 
 		TAILQ_FOREACH(p, &__pthread_head, plist) {
-			mach_port_t kp;
-			while ((kp = _pthread_kernel_thread(p)) == MACH_PORT_NULL) {
-				UNLOCK(_pthread_list_lock);
-				sched_yield();
-				LOCK(_pthread_list_lock);
-			}
+			mach_port_t kp = _pthread_kernel_thread(p);
 			if (thread_port == kp) {
 				p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
 				break;
@@ -1835,26 +1850,37 @@ pthread_workqueue_atfork_child(void)
 	}
 }
 
+// workqueue entry point from kernel
 void
-_pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int flags)
+_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr, void *keventlist, int flags, int nkevents)
 {
 	PTHREAD_ASSERT(flags & WQ_FLAG_THREAD_NEWSPI);
 
 	int thread_reuse = flags & WQ_FLAG_THREAD_REUSE;
 	int thread_class = flags & WQ_FLAG_THREAD_PRIOMASK;
 	int overcommit = (flags & WQ_FLAG_THREAD_OVERCOMMIT) != 0;
+	int kevent = flags & WQ_FLAG_THREAD_KEVENT;
+	PTHREAD_ASSERT((!kevent) || (__libdispatch_keventfunction != NULL));
+
+	pthread_priority_t priority = 0;
+	unsigned long priority_flags = 0;
 
-	pthread_priority_t priority;
+	if (overcommit)
+		priority_flags |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
+	if (flags & WQ_FLAG_THREAD_EVENT_MANAGER)
+		priority_flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
 
 	if ((__pthread_supported_features & PTHREAD_FEATURE_QOS_MAINTENANCE) == 0) {
-		priority = _pthread_priority_make_version2(thread_class, 0, (overcommit ? _PTHREAD_PRIORITY_OVERCOMMIT_FLAG : 0));
+		priority = _pthread_priority_make_version2(thread_class, 0, priority_flags);
 	} else {
-		priority = _pthread_priority_make_newest(thread_class, 0, (overcommit ? _PTHREAD_PRIORITY_OVERCOMMIT_FLAG : 0));
+		priority = _pthread_priority_make_newest(thread_class, 0, priority_flags);
 	}
 
 	if (thread_reuse == 0) {
 		// New thread created by kernel, needs initialization.
-		_pthread_struct_init(self, &_pthread_attr_default, stackaddr, DEFAULT_STACK_SIZE, 1);
+		size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr;
+		_pthread_struct_init(self, &_pthread_attr_default, (void*)self, stacksize, 1);
+
 		_pthread_set_kernel_thread(self, kport);
 		self->wqthread = 1;
 		self->wqkillset = 0;
@@ -1868,12 +1894,12 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unus
 		_pthread_set_self(self);
 		_pthread_introspection_thread_create(self, false);
 		__pthread_add_thread(self, false);
+	}
 
-		// If we're running with fine-grained priority, we also need to
-		// set this thread to have the QoS class provided to use by the kernel
-		if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-			_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(thread_class, 0, 0));
-		}
+	// If we're running with fine-grained priority, we also need to
+	// set this thread to have the QoS class provided to use by the kernel
+	if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
+		_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(thread_class, 0, priority_flags));
 	}
 
 #if WQ_DEBUG
@@ -1881,9 +1907,32 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unus
 	PTHREAD_ASSERT(self == pthread_self());
 #endif // WQ_DEBUG
 
-	self->fun = (void *(*)(void *))__libdispatch_workerfunction;
+	if (kevent){
+		self->fun = (void *(*)(void*))__libdispatch_keventfunction;
+	} else {
+		self->fun = (void *(*)(void *))__libdispatch_workerfunction;
+	}
 	self->arg = (void *)(uintptr_t)thread_class;
 
+	if (kevent && keventlist){
+	kevent_errors_retry:
+		(*__libdispatch_keventfunction)(&keventlist, &nkevents);
+
+		int errors_out = __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, keventlist, nkevents, 0);
+		if (errors_out > 0){
+			nkevents = errors_out;
+			goto kevent_errors_retry;
+		} else if (errors_out < 0){
+			PTHREAD_ABORT("kevent return produced an error: %d", errno);
+		}
+		_pthread_exit(self, NULL);
+    } else if (kevent){
+		(*__libdispatch_keventfunction)(NULL, NULL);
+
+		__workq_kernreturn(WQOPS_THREAD_RETURN, NULL, 0, 0);
+		_pthread_exit(self, NULL);
+    }
+
 	if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
 		if (!__workq_newapi) {
 			/* Old thread priorities are inverted from where we have them in
@@ -1951,20 +2000,6 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unus
 
 /***** pthread workqueue API for libdispatch *****/
 
-int
-_pthread_workqueue_init(pthread_workqueue_function2_t func, int offset, int flags)
-{
-	if (flags != 0) {
-		return ENOTSUP;
-	}
-
-	__workq_newapi = true;
-	__libdispatch_offset = offset;
-
-	int rv = pthread_workqueue_setdispatch_np((pthread_workqueue_function_t)func);
-	return rv;
-}
-
 void
 pthread_workqueue_setdispatchoffset_np(int offset)
 {
@@ -1972,16 +2007,17 @@ pthread_workqueue_setdispatchoffset_np(int offset)
 }
 
 int
-pthread_workqueue_setdispatch_np(pthread_workqueue_function_t worker_func)
+pthread_workqueue_setdispatch_with_kevent_np(pthread_workqueue_function2_t queue_func, pthread_workqueue_function_kevent_t kevent_func)
 {
 	int res = EBUSY;
 	if (__libdispatch_workerfunction == NULL) {
 		// Check whether the kernel supports new SPIs
-		res = __workq_kernreturn(WQOPS_QUEUE_NEWSPISUPP, NULL, __libdispatch_offset, 0);
+		res = __workq_kernreturn(WQOPS_QUEUE_NEWSPISUPP, NULL, __libdispatch_offset, kevent_func != NULL ? 0x01 : 0x00);
 		if (res == -1){
 			res = ENOTSUP;
 		} else {
-			__libdispatch_workerfunction = (pthread_workqueue_function2_t)worker_func;
+			__libdispatch_workerfunction = queue_func;
+			__libdispatch_keventfunction = kevent_func;
 
 			// Prepare the kernel for workq action
 			(void)__workq_open();
@@ -1993,6 +2029,32 @@ pthread_workqueue_setdispatch_np(pthread_workqueue_function_t worker_func)
 	return res;
 }
 
+int
+_pthread_workqueue_init_with_kevent(pthread_workqueue_function2_t queue_func, pthread_workqueue_function_kevent_t kevent_func, int offset, int flags)
+{
+	if (flags != 0) {
+		return ENOTSUP;
+	}
+	
+	__workq_newapi = true;
+	__libdispatch_offset = offset;
+	
+	int rv = pthread_workqueue_setdispatch_with_kevent_np(queue_func, kevent_func);
+	return rv;
+}
+
+int
+_pthread_workqueue_init(pthread_workqueue_function2_t func, int offset, int flags)
+{
+	return _pthread_workqueue_init_with_kevent(func, NULL, offset, flags);
+}
+
+int
+pthread_workqueue_setdispatch_np(pthread_workqueue_function_t worker_func)
+{
+	return pthread_workqueue_setdispatch_with_kevent_np((pthread_workqueue_function2_t)worker_func, NULL);
+}
+
 int
 _pthread_workqueue_supported(void)
 {
@@ -2070,6 +2132,16 @@ _pthread_workqueue_addthreads(int numthreads, pthread_priority_t priority)
 	return res;
 }
 
+int
+_pthread_workqueue_set_event_manager_priority(pthread_priority_t priority)
+{
+	int res = __workq_kernreturn(WQOPS_SET_EVENT_MANAGER_PRIORITY, NULL, (int)priority, 0);
+	if (res == -1) {
+		res = errno;
+	}
+	return res;
+}
+
 /*
  * Introspection SPI for libpthread.
  */
diff --git a/src/pthread_asm.s b/src/pthread_asm.s
index df03a25..b691763 100644
--- a/src/pthread_asm.s
+++ b/src/pthread_asm.s
@@ -84,6 +84,7 @@ _start_wqthread:
 	push   %ebp
 	mov    %esp,%ebp
 	sub    $28,%esp		// align the stack
+	mov    %esi,20(%esp)    //arg5
 	mov    %edi,16(%esp)    //arg5
 	mov    %edx,12(%esp)    //arg4
 	mov    %ecx,8(%esp)             //arg3
@@ -138,6 +139,10 @@ ___pthread_set_self:
 	.align 2
 	.globl _start_wqthread
 _start_wqthread:
+#if __ARM_ARCH_7K__
+	/* align stack to 16 bytes before calling C */
+	sub sp, sp, #8
+#endif
 	stmfd sp!, {r4, r5}
 	bl __pthread_wqthread
 // Stackshots will show the routine that happens to link immediately following
@@ -149,6 +154,10 @@ _start_wqthread:
 	.align 2
 	.globl _thread_start
 _thread_start:
+#if __ARM_ARCH_7K__
+	/* align stack to 16 bytes before calling C */
+	sub sp, sp, #8
+#endif
 	stmfd sp!, {r4, r5}
 	bl __pthread_start
 // See above
diff --git a/src/pthread_cond.c b/src/pthread_cond.c
index a425fbe..ce2683a 100644
--- a/src/pthread_cond.c
+++ b/src/pthread_cond.c
@@ -163,24 +163,34 @@ _pthread_cond_init(_pthread_cond *cond, const pthread_condattr_t *attr, int conf
 	return 0;
 }
 
+PTHREAD_NOINLINE
 static int
-_pthread_cond_check_init(_pthread_cond *cond, bool *inited)
+_pthread_cond_check_init_slow(_pthread_cond *cond, bool *inited)
 {
-	int res = 0;
-	if (cond->sig != _PTHREAD_COND_SIG) {
-		res = EINVAL;
+	int res = EINVAL;
+	if (cond->sig == _PTHREAD_COND_SIG_init) {
+		LOCK(cond->lock);
 		if (cond->sig == _PTHREAD_COND_SIG_init) {
-			LOCK(cond->lock);
-			if (cond->sig == _PTHREAD_COND_SIG_init) {
-				res = _pthread_cond_init(cond, NULL, 0);
-				if (inited) {
-					*inited = true;
-				}
-			} else if (cond->sig == _PTHREAD_COND_SIG) {
-				res = 0;
+			res = _pthread_cond_init(cond, NULL, 0);
+			if (inited) {
+				*inited = true;
 			}
-			UNLOCK(cond->lock);
+		} else if (cond->sig == _PTHREAD_COND_SIG) {
+			res = 0;
 		}
+		UNLOCK(cond->lock);
+	} else if (cond->sig == _PTHREAD_COND_SIG) {
+		res = 0;
+	}
+	return res;
+}
+
+static inline int
+_pthread_cond_check_init(_pthread_cond *cond, bool *inited)
+{
+	int res = 0;
+	if (cond->sig != _PTHREAD_COND_SIG) {
+		return _pthread_cond_check_init_slow(cond, inited);
 	}
 	return res;
 }
@@ -432,7 +442,8 @@ extern void _pthread_testcancel(pthread_t thread, int isconforming);
 	}
 
 	if (isconforming) {
-		if (mutex->sig != _PTHREAD_MUTEX_SIG && (mutex->sig & _PTHREAD_MUTEX_SIG_init_MASK) != _PTHREAD_MUTEX_SIG_CMP) {
+		if (!_pthread_mutex_check_signature(mutex) &&
+				!_pthread_mutex_check_signature_init(mutex)) {
 			return EINVAL;
 		}
 		if (isconforming > 0) {
diff --git a/src/pthread_mutex.c b/src/pthread_mutex.c
index 794b6ff..d214739 100644
--- a/src/pthread_mutex.c
+++ b/src/pthread_mutex.c
@@ -50,10 +50,13 @@
  * -- Mutex variable support
  */
 
+#include "resolver.h"
 #include "internal.h"
 #include "kern/kern_trace.h"
 #include <sys/syscall.h>
 
+#include "os/atomic.h"
+
 #ifdef PLOCKSTAT
 #include "plockstat.h"
 #else /* !PLOCKSTAT */
@@ -69,7 +72,19 @@
 extern int __unix_conforming;
 
 #ifndef BUILDING_VARIANT
-PTHREAD_NOEXPORT int __mtx_markprepost(_pthread_mutex *mutex, uint32_t oupdateval, int firstfit);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int
+_pthread_mutex_unlock_slow(pthread_mutex_t *omutex);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int
+_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock);
+
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into _pthread_mutex_lock
+int
+_pthread_mutex_lock_wait(pthread_mutex_t *omutex, uint64_t newval64, uint64_t oldtid);
+
 #endif /* BUILDING_VARIANT */
 
 #define DEBUG_TRACE_POINTS 0
@@ -83,49 +98,48 @@ extern int __syscall(int number, ...);
 
 #include <machine/cpu_capabilities.h>
 
-static int _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint32_t static_type);
+static inline int _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint32_t static_type);
 
 #if !__LITTLE_ENDIAN__
 #error MUTEX_GETSEQ_ADDR assumes little endian layout of 2 32-bit sequence words
 #endif
 
-static void
+PTHREAD_ALWAYS_INLINE
+static inline void
 MUTEX_GETSEQ_ADDR(_pthread_mutex *mutex,
 		  volatile uint64_t **seqaddr)
 {
-	if (mutex->mtxopts.options.misalign) {
-		*seqaddr = (volatile uint64_t *)&mutex->m_seq[1];
-	} else {
-		*seqaddr = (volatile uint64_t *)&mutex->m_seq[0];
-	}
+	// addr of m_seq[1] for misaligned, m_seq[0] for aligned mutex struct
+	*seqaddr = (volatile uint64_t *)(((uintptr_t)&mutex->m_seq[1]) & ~0x7ul);
 }
 
-static void
+PTHREAD_ALWAYS_INLINE
+static inline void
 MUTEX_GETTID_ADDR(_pthread_mutex *mutex,
 				  volatile uint64_t **tidaddr)
 {
-	if (mutex->mtxopts.options.misalign) {
-		*tidaddr = (volatile uint64_t *)&mutex->m_tid[1];
-	} else {
-		*tidaddr = (volatile uint64_t *)&mutex->m_tid[0];
-	}
+	// addr of m_tid[1] for misaligned, m_tid[0] for aligned mutex struct
+	*tidaddr = (volatile uint64_t *)(((uintptr_t)&mutex->m_tid[1]) & ~0x7ul);
 }
 
 #ifndef BUILDING_VARIANT /* [ */
+#ifndef OS_UP_VARIANT_ONLY
 
 #define BLOCK_FAIL_PLOCKSTAT    0
 #define BLOCK_SUCCESS_PLOCKSTAT 1
 
+#ifdef PLOCKSTAT
 /* This function is never called and exists to provide never-fired dtrace
  * probes so that user d scripts don't get errors.
  */
-__private_extern__ __attribute__((used)) void
+PTHREAD_NOEXPORT PTHREAD_USED
+void
 _plockstat_never_fired(void) 
 {
 	PLOCKSTAT_MUTEX_SPIN(NULL);
 	PLOCKSTAT_MUTEX_SPUN(NULL, 0, 0);
 }
-
+#endif // PLOCKSTAT
 
 /*
  * Initialize a mutex variable, possibly with additional attributes.
@@ -137,7 +151,7 @@ pthread_mutex_init(pthread_mutex_t *omutex, const pthread_mutexattr_t *attr)
 #if 0
 	/* conformance tests depend on not having this behavior */
 	/* The test for this behavior is optional */
-	if (mutex->sig == _PTHREAD_MUTEX_SIG)
+	if (_pthread_mutex_check_signature(mutex))
 		return EBUSY;
 #endif
 	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
@@ -150,7 +164,7 @@ pthread_mutex_getprioceiling(const pthread_mutex_t *omutex, int *prioceiling)
 {
 	int res = EINVAL;
 	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
-	if (mutex->sig == _PTHREAD_MUTEX_SIG) {
+	if (_pthread_mutex_check_signature(mutex)) {
 		LOCK(mutex->lock);
 		*prioceiling = mutex->prioceiling;
 		res = 0;
@@ -164,7 +178,7 @@ pthread_mutex_setprioceiling(pthread_mutex_t *omutex, int prioceiling, int *old_
 {
 	int res = EINVAL;
 	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
-	if (mutex->sig == _PTHREAD_MUTEX_SIG) {
+	if (_pthread_mutex_check_signature(mutex)) {
 		LOCK(mutex->lock);
 		if (prioceiling >= -999 || prioceiling <= 999) {
 			*old_prioceiling = mutex->prioceiling;
@@ -337,6 +351,18 @@ pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
 	return res;
 }
 
+PTHREAD_NOEXPORT PTHREAD_WEAK // prevent inlining of return value into callers
+int
+_pthread_mutex_corruption_abort(_pthread_mutex *mutex);
+
+PTHREAD_NOINLINE
+int
+_pthread_mutex_corruption_abort(_pthread_mutex *mutex)
+{
+	PTHREAD_ABORT("pthread_mutex corruption: mutex %p owner changed in the middle of lock/unlock");
+	return EINVAL; // NOTREACHED
+}
+
 /*
  * Sequence numbers and TID:
  *
@@ -359,12 +385,14 @@ pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared)
  * a thread may get preempted and another thread might see inconsistent data. In the worst case, another
  * thread may see the TID in the SWITCHING (-1) state or unlocked (0) state for longer because the
  * owning thread was preempted.
+ */
 
 /*
  * Drop the mutex unlock references from cond_wait. or mutex_unlock.
  */
-__private_extern__ int
-__mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_unlock_updatebits(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
 {
 	bool firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
 	uint32_t lgenval, ugenval, flags;
@@ -379,7 +407,7 @@ __mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32
 		uint64_t selfid = _pthread_selfid_direct();
 
 		if (*tidaddr != selfid) {
-			//PTHREAD_ABORT("dropping recur or error mutex not owned by the thread\n");
+			//PTHREAD_ABORT("dropping recur or error mutex not owned by the thread");
 			PLOCKSTAT_MUTEX_ERROR((pthread_mutex_t *)mutex, EPERM);
 			return EPERM;
 		} else if (mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE &&
@@ -438,9 +466,9 @@ __mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32
 				// or 0 so that once the CAS below succeeds, there is no stale ownership information.
 				// If the CAS of the seqaddr fails, we may loop, but it's still valid for the owner
 				// to be SWITCHING/0
-				if (!OSAtomicCompareAndSwap64(oldtid, newtid, (volatile int64_t *)tidaddr)) {
+				if (!os_atomic_cmpxchg(tidaddr, oldtid, newtid, relaxed)) {
 					// we own this mutex, nobody should be updating it except us
-					__builtin_trap();
+					return _pthread_mutex_corruption_abort(mutex);
 				}
 			}
 		}
@@ -456,7 +484,7 @@ __mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32
 		newval64 = (((uint64_t)ugenval) << 32);
 		newval64 |= lgenval;
 
-	} while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)seqaddr) != TRUE);
+	} while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, release));
 
 	if (clearprepost) {
 		 __psynch_cvclrprepost(mutex, lgenval, ugenval, 0, 0, lgenval, (flags | _PTHREAD_MTX_OPT_MUTEX));
@@ -478,8 +506,16 @@ __mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32
 	return 0;
 }
 
-static int
-__mtx_updatebits(_pthread_mutex *mutex, uint64_t selfid)
+PTHREAD_NOEXPORT
+int
+__mtx_droplock(_pthread_mutex *mutex, uint32_t *flagsp, uint32_t **pmtxp, uint32_t *mgenp, uint32_t *ugenp)
+{
+	return _pthread_mutex_unlock_updatebits(mutex, flagsp, pmtxp, mgenp, ugenp);
+}
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_lock_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 {
 	int res = 0;
 	int firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
@@ -527,20 +563,20 @@ __mtx_updatebits(_pthread_mutex *mutex, uint64_t selfid)
 
 		// set s and b bit
 		// Retry if CAS fails, or if it succeeds with firstfit and E bit already set
-	} while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)seqaddr) != TRUE ||
-		 (firstfit && isebit));
+	} while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, acquire) || (firstfit && isebit));
 
 	if (res == 0) {
-		if (!OSAtomicCompareAndSwap64Barrier(oldtid, selfid, (volatile int64_t *)tidaddr)) {
+		if (!os_atomic_cmpxchg(tidaddr, oldtid, selfid, relaxed)) {
 			// we own this mutex, nobody should be updating it except us
-			__builtin_trap();
+			return _pthread_mutex_corruption_abort(mutex);
 		}
 	}
 
 	return res;
 }
 
-int
+PTHREAD_NOINLINE
+static int
 __mtx_markprepost(_pthread_mutex *mutex, uint32_t updateval, int firstfit)
 {
 	uint32_t flags;
@@ -570,7 +606,7 @@ __mtx_markprepost(_pthread_mutex *mutex, uint32_t updateval, int firstfit)
 			}
 			newval64 = (((uint64_t)ugenval) << 32);
 			newval64 |= lgenval;
-		} while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)seqaddr) != TRUE);
+		} while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, release));
 		
 		if (clearprepost != 0) {
 			__psynch_cvclrprepost(mutex, lgenval, ugenval, 0, 0, lgenval, (flags | _PTHREAD_MTX_OPT_MUTEX));
@@ -579,50 +615,82 @@ __mtx_markprepost(_pthread_mutex *mutex, uint32_t updateval, int firstfit)
 	return 0;
 }
 
-static inline bool
-_pthread_mutex_check_init_fast(_pthread_mutex *mutex)
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_check_init_slow(pthread_mutex_t *omutex)
 {
-	return (mutex->sig == _PTHREAD_MUTEX_SIG);
+	int res = EINVAL;
+	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
+
+	if (_pthread_mutex_check_signature_init(mutex)) {
+		LOCK(mutex->lock);
+		if (_pthread_mutex_check_signature_init(mutex)) {
+			// initialize a statically initialized mutex to provide
+			// compatibility for misbehaving applications.
+			// (unlock should not be the first operation on a mutex)
+			res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
+		} else if (_pthread_mutex_check_signature(mutex)) {
+			res = 0;
+		}
+		UNLOCK(mutex->lock);
+	} else if (_pthread_mutex_check_signature(mutex)) {
+		res = 0;
+	}
+	if (res != 0) {
+		PLOCKSTAT_MUTEX_ERROR(omutex, res);
+	}
+	return res;
 }
 
-static int __attribute__((noinline))
+PTHREAD_ALWAYS_INLINE
+static inline int
 _pthread_mutex_check_init(pthread_mutex_t *omutex)
 {
 	int res = 0;
 	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
 	
-	if (mutex->sig != _PTHREAD_MUTEX_SIG) {
-		res = EINVAL;
-		if ((mutex->sig & _PTHREAD_MUTEX_SIG_init_MASK) == _PTHREAD_MUTEX_SIG_CMP) {
-			LOCK(mutex->lock);
-			if ((mutex->sig & _PTHREAD_MUTEX_SIG_init_MASK) == _PTHREAD_MUTEX_SIG_CMP) {
-				// initialize a statically initialized mutex to provide
-				// compatibility for misbehaving applications.
-				// (unlock should not be the first operation on a mutex)
-				res = _pthread_mutex_init(mutex, NULL, (mutex->sig & 0xf));
-			} else if (mutex->sig == _PTHREAD_MUTEX_SIG) {
-				res = 0;
-			}
-			UNLOCK(mutex->lock);
-		}
-		if (res != 0) {
-			PLOCKSTAT_MUTEX_ERROR(omutex, res);
-		}
+	if (!_pthread_mutex_check_signature(mutex)) {
+		return _pthread_mutex_check_init_slow(omutex);
 	}
 	return res;
 }
 
-static int
-_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
+PTHREAD_NOINLINE
+int
+_pthread_mutex_lock_wait(pthread_mutex_t *omutex, uint64_t newval64, uint64_t oldtid)
+{
+	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
+	uint32_t lgenval = (uint32_t)newval64;
+	uint32_t ugenval = (uint32_t)(newval64 >> 32);
+
+	volatile uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+	uint64_t selfid = _pthread_selfid_direct();
+
+	PLOCKSTAT_MUTEX_BLOCK(omutex);
+	do {
+		uint32_t updateval;
+		do {
+			updateval = __psynch_mutexwait(omutex, lgenval, ugenval, oldtid, mutex->mtxopts.value);
+			oldtid = *tidaddr;
+		} while (updateval == (uint32_t)-1);
+
+		// returns 0 on succesful update; in firstfit it may fail with 1
+	} while (_pthread_mutex_lock_updatebits(mutex, selfid) == 1);
+	PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT);
+
+	return 0;
+}
+
+int
+_pthread_mutex_lock_slow(pthread_mutex_t *omutex, bool trylock)
 {
 	int res;
 	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
 
-	if (os_slowpath(!_pthread_mutex_check_init_fast(mutex))) {
-		res = _pthread_mutex_check_init(omutex);
-		if (res != 0) {
-			return res;
-		}
+	res = _pthread_mutex_check_init(omutex);
+	if (res != 0) {
+		return res;
 	}
 
 	uint64_t oldtid;
@@ -684,12 +752,10 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
 		newval64 |= lgenval;
 		
 		// Set S and B bit
-	} while (OSAtomicCompareAndSwap64Barrier(oldval64, newval64, (volatile int64_t *)seqaddr) == FALSE);
+	} while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, acquire));
 
 	if (gotlock) {
-		if (!OSAtomicCompareAndSwap64Barrier(oldtid, selfid, (volatile int64_t *)tidaddr)) {
-			while (!OSAtomicCompareAndSwap64Barrier(*tidaddr, selfid, (volatile int64_t *)tidaddr));
-		}
+		os_atomic_store(tidaddr, selfid, relaxed);
 		res = 0;
 		DEBUG_TRACE(psynch_mutex_ulock, omutex, lgenval, ugenval, selfid);
 		PLOCKSTAT_MUTEX_ACQUIRE(omutex, 0, 0);
@@ -698,18 +764,7 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
 		DEBUG_TRACE(psynch_mutex_utrylock_failed, omutex, lgenval, ugenval, oldtid);
 		PLOCKSTAT_MUTEX_ERROR(omutex, res);
 	} else {
-		PLOCKSTAT_MUTEX_BLOCK(omutex);
-		do {
-			uint32_t updateval;
-			do {
-				updateval = __psynch_mutexwait(omutex, lgenval, ugenval, oldtid, mutex->mtxopts.value);
-				oldtid = *tidaddr;
-			} while (updateval == (uint32_t)-1);
-
-			// returns 0 on succesful update; in firstfit it may fail with 1
-		} while (__mtx_updatebits(mutex, selfid) == 1);
-		res = 0;
-		PLOCKSTAT_MUTEX_BLOCKED(omutex, BLOCK_SUCCESS_PLOCKSTAT);
+		res = _pthread_mutex_lock_wait(omutex, newval64, oldtid);
 	}
 
 	if (res == 0 && mutex->mtxopts.options.type == PTHREAD_MUTEX_RECURSIVE) {
@@ -721,24 +776,127 @@ _pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
 	return res;
 }
 
+#endif // OS_UP_VARIANT_ONLY
+
+PTHREAD_ALWAYS_INLINE
+static inline int
+_pthread_mutex_lock(pthread_mutex_t *omutex, bool trylock)
+{
+#if PLOCKSTAT || DEBUG_TRACE_POINTS
+	if (PLOCKSTAT_MUTEX_ACQUIRE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED() ||
+			DEBUG_TRACE_POINTS) {
+		return _pthread_mutex_lock_slow(omutex, trylock);
+	}
+#endif
+	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
+	if (!_pthread_mutex_check_signature_fast(mutex)) {
+		return _pthread_mutex_lock_slow(omutex, trylock);
+	}
+
+	uint64_t oldtid;
+	volatile uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+	uint64_t selfid = _pthread_selfid_direct();
+
+	uint64_t oldval64, newval64;
+	volatile uint64_t *seqaddr;
+	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+	uint32_t lgenval, ugenval;
+	bool gotlock = false;
+
+	do {
+		oldval64 = *seqaddr;
+		oldtid = *tidaddr;
+		lgenval = (uint32_t)oldval64;
+		ugenval = (uint32_t)(oldval64 >> 32);
+
+		gotlock = ((lgenval & PTH_RWL_EBIT) == 0);
+
+		if (trylock && !gotlock) {
+			// A trylock on a held lock will fail immediately. But since
+			// we did not load the sequence words atomically, perform a
+			// no-op CAS64 to ensure that nobody has unlocked concurrently.
+		} else {
+			// Increment the lock sequence number and force the lock into E+K
+			// mode, whether "gotlock" is true or not.
+			lgenval += PTHRW_INC;
+			lgenval |= PTH_RWL_EBIT | PTH_RWL_KBIT;
+		}
+
+		newval64 = (((uint64_t)ugenval) << 32);
+		newval64 |= lgenval;
+
+		// Set S and B bit
+	} while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, acquire));
+
+	if (os_fastpath(gotlock)) {
+		os_atomic_store(tidaddr, selfid, relaxed);
+		return 0;
+	} else if (trylock) {
+		return EBUSY;
+	} else {
+		return _pthread_mutex_lock_wait(omutex, newval64, oldtid);
+	}
+}
+
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_lock(pthread_mutex_t *mutex)
 {
 	return _pthread_mutex_lock(mutex, false);
 }
 
+PTHREAD_NOEXPORT_VARIANT
 int
 pthread_mutex_trylock(pthread_mutex_t *mutex)
 {
 	return _pthread_mutex_lock(mutex, true);
 }
 
+#ifndef OS_UP_VARIANT_ONLY
 /*
  * Unlock a mutex.
  * TODO: Priority inheritance stuff
  */
+
+PTHREAD_NOINLINE
+static int
+_pthread_mutex_unlock_drop(pthread_mutex_t *omutex, uint64_t newval64, uint32_t flags)
+{
+	int res;
+	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
+	uint32_t lgenval = (uint32_t)newval64;
+	uint32_t ugenval = (uint32_t)(newval64 >> 32);
+
+	uint32_t updateval;
+	int firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+	volatile uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+	updateval = __psynch_mutexdrop(omutex, lgenval, ugenval, *tidaddr, flags);
+
+	if (updateval == (uint32_t)-1) {
+		res = errno;
+
+		if (res == EINTR) {
+			res = 0;
+		}
+		if (res != 0) {
+			PTHREAD_ABORT("__p_mutexdrop failed with error %d", res);
+		}
+		return res;
+	} else if (firstfit == 1) {
+		if ((updateval & PTH_RWL_PBIT) != 0) {
+			__mtx_markprepost(mutex, updateval, firstfit);
+		}
+	}
+
+	return 0;
+}
+
 int
-pthread_mutex_unlock(pthread_mutex_t *omutex)
+_pthread_mutex_unlock_slow(pthread_mutex_t *omutex)
 {
 	int res;
 	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
@@ -746,51 +904,96 @@ pthread_mutex_unlock(pthread_mutex_t *omutex)
 
 	// Initialize static mutexes for compatibility with misbehaving
 	// applications (unlock should not be the first operation on a mutex).
-	if (os_slowpath(!_pthread_mutex_check_init_fast(mutex))) {
-		res = _pthread_mutex_check_init(omutex);
-		if (res != 0) {
-			return res;
-		}
+	res = _pthread_mutex_check_init(omutex);
+	if (res != 0) {
+		return res;
 	}
 
-	res = __mtx_droplock(mutex, &flags, NULL, &mtxgen, &mtxugen);
+	res = _pthread_mutex_unlock_updatebits(mutex, &flags, NULL, &mtxgen, &mtxugen);
 	if (res != 0) {
 		return res;
 	}
 
 	if ((flags & _PTHREAD_MTX_OPT_NOTIFY) != 0) {
-		uint32_t updateval;
-		int firstfit = (mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FIRSTFIT);
+		uint64_t newval64;
+		newval64 = (((uint64_t)mtxugen) << 32);
+		newval64 |= mtxgen;
+		return _pthread_mutex_unlock_drop(omutex, newval64, flags);
+	} else {
 		volatile uint64_t *tidaddr;
 		MUTEX_GETTID_ADDR(mutex, &tidaddr);
+		DEBUG_TRACE(psynch_mutex_uunlock, omutex, mtxgen, mtxugen, *tidaddr);
+	}
 
-		updateval = __psynch_mutexdrop(omutex, mtxgen, mtxugen, *tidaddr, flags);
+	return 0;
+}
 
-		if (updateval == (uint32_t)-1) {
-			res = errno;
+#endif // OS_UP_VARIANT_ONLY
 
-			if (res == EINTR) {
-				res = 0;
-			}
-			if (res != 0) {
-				PTHREAD_ABORT("__p_mutexdrop failed with error %d\n", res);
-			}
-			return res;
-		} else if (firstfit == 1) {
-			if ((updateval & PTH_RWL_PBIT) != 0) {
-				__mtx_markprepost(mutex, updateval, firstfit);
+PTHREAD_NOEXPORT_VARIANT
+int
+pthread_mutex_unlock(pthread_mutex_t *omutex)
+{
+#if PLOCKSTAT || DEBUG_TRACE_POINTS
+	if (PLOCKSTAT_MUTEX_RELEASE_ENABLED() || PLOCKSTAT_MUTEX_ERROR_ENABLED() ||
+			DEBUG_TRACE_POINTS) {
+		return _pthread_mutex_unlock_slow(omutex);
+	}
+#endif
+	_pthread_mutex *mutex = (_pthread_mutex *)omutex;
+	if (!_pthread_mutex_check_signature_fast(mutex)) {
+		return _pthread_mutex_unlock_slow(omutex);
+	}
+
+	volatile uint64_t *tidaddr;
+	MUTEX_GETTID_ADDR(mutex, &tidaddr);
+
+	uint64_t oldval64, newval64;
+	volatile uint64_t *seqaddr;
+	MUTEX_GETSEQ_ADDR(mutex, &seqaddr);
+
+	uint32_t lgenval, ugenval;
+
+	do {
+		oldval64 = *seqaddr;
+		lgenval = (uint32_t)oldval64;
+		ugenval = (uint32_t)(oldval64 >> 32);
+
+		int numwaiters = diff_genseq(lgenval, ugenval); // pending waiters
+
+		if (numwaiters == 0) {
+			// spurious unlock; do not touch tid
+		} else {
+			ugenval += PTHRW_INC;
+
+			if ((lgenval & PTHRW_COUNT_MASK) == (ugenval & PTHRW_COUNT_MASK)) {
+				// our unlock sequence matches to lock sequence, so if the CAS is successful, the mutex is unlocked
+
+				/* do not reset Ibit, just K&E */
+				lgenval &= ~(PTH_RWL_KBIT | PTH_RWL_EBIT);
+			} else {
+				return _pthread_mutex_unlock_slow(omutex);
 			}
+
+			// We're giving up the mutex one way or the other, so go ahead and update the owner
+			// to 0 so that once the CAS below succeeds, there is no stale ownership information.
+			// If the CAS of the seqaddr fails, we may loop, but it's still valid for the owner
+			// to be SWITCHING/0
+			os_atomic_store(tidaddr, 0, relaxed);
 		}
-	} else {
-		volatile uint64_t *tidaddr;
-		MUTEX_GETTID_ADDR(mutex, &tidaddr);
-		DEBUG_TRACE(psynch_mutex_uunlock, omutex, mtxgen, mtxugen, *tidaddr);
-	}
+
+		newval64 = (((uint64_t)ugenval) << 32);
+		newval64 |= lgenval;
+
+	} while (!os_atomic_cmpxchg(seqaddr, oldval64, newval64, release));
 
 	return 0;
 }
 
-int
+#ifndef OS_UP_VARIANT_ONLY
+
+
+static inline int
 _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint32_t static_type)
 {
 	if (attr) {
@@ -843,12 +1046,35 @@ _pthread_mutex_init(_pthread_mutex *mutex, const pthread_mutexattr_t *attr, uint
 	mutex->prioceiling = 0;
 	mutex->priority = 0;
 
-	mutex->mtxopts.options.misalign = (((uintptr_t)&mutex->m_seq[0]) & 0x7) != 0;
-	
-	// Ensure all contents are properly set before setting signature.
-	OSMemoryBarrier();
+	mutex->mtxopts.options.misalign = (((uintptr_t)&mutex->m_seq[0]) & 0x7ul) != 0;
+	if (mutex->mtxopts.options.misalign) {
+		mutex->m_tid[0] = ~0u;
+	} else {
+		mutex->m_seq[2] = ~0u;
+	}
+
+	long sig = _PTHREAD_MUTEX_SIG;
+	if (mutex->mtxopts.options.type == PTHREAD_MUTEX_NORMAL &&
+			mutex->mtxopts.options.policy == _PTHREAD_MUTEX_POLICY_FAIRSHARE) {
+		// rdar://18148854 _pthread_mutex_lock & pthread_mutex_unlock fastpath
+		sig = _PTHREAD_MUTEX_SIG_fast;
+	}
 
-	mutex->sig = _PTHREAD_MUTEX_SIG;
+	// unused, purely for detecting copied mutexes and smashes during debugging:
+	mutex->reserved2[0] = ~(uintptr_t)mutex; // use ~ to hide from leaks
+	mutex->reserved2[1] = (uintptr_t)sig;
+
+	// Ensure all contents are properly set before setting signature.
+#if defined(__LP64__)
+	// For binary compatibility reasons we cannot require natural alignment of
+	// the 64bit 'sig' long value in the struct. rdar://problem/21610439
+	uint32_t *sig32_ptr = (uint32_t*)&mutex->sig;
+	uint32_t *sig32_val = (uint32_t*)&sig;
+	*(sig32_ptr+1) = *(sig32_val+1);
+	os_atomic_store(sig32_ptr, *sig32_val, release);
+#else
+	os_atomic_store2o(mutex, sig, sig, release);
+#endif
 
 	return 0;
 }
@@ -861,7 +1087,7 @@ pthread_mutex_destroy(pthread_mutex_t *omutex)
 	int res = EINVAL;
 
 	LOCK(mutex->lock);
-	if (mutex->sig == _PTHREAD_MUTEX_SIG) {
+	if (_pthread_mutex_check_signature(mutex)) {
 		uint32_t lgenval, ugenval;
 		uint64_t oldval64;
 		volatile uint64_t *seqaddr;
@@ -879,7 +1105,7 @@ pthread_mutex_destroy(pthread_mutex_t *omutex)
 		} else {
 			res = EBUSY;
 		}
-	} else if ((mutex->sig & _PTHREAD_MUTEX_SIG_init_MASK ) == _PTHREAD_MUTEX_SIG_CMP) {
+	} else if (_pthread_mutex_check_signature_init(mutex)) {
 		mutex->sig = _PTHREAD_NO_SIG;
 		res = 0;
 	}
@@ -888,8 +1114,11 @@ pthread_mutex_destroy(pthread_mutex_t *omutex)
 	return res;	
 }
 
+#endif // OS_UP_VARIANT_ONLY
+
 #endif /* !BUILDING_VARIANT ] */
 
+#ifndef OS_UP_VARIANT_ONLY
 /*
  * Destroy a mutex attribute structure.
  */
@@ -909,4 +1138,4 @@ pthread_mutexattr_destroy(pthread_mutexattr_t *attr)
 	return 0;
 }
 
-
+#endif // OS_UP_VARIANT_ONLY
diff --git a/src/pthread_mutex_up.c b/src/pthread_mutex_up.c
new file mode 100644
index 0000000..f983b9a
--- /dev/null
+++ b/src/pthread_mutex_up.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+#define OS_ATOMIC_UP 1
+#include "resolver_internal.h"
+
+#ifdef OS_VARIANT_SELECTOR
+#define OS_UP_VARIANT_ONLY 1
+#include "pthread_mutex.c"
+#endif
+
+struct _os_empty_files_are_not_c_files;
+
diff --git a/src/pthread_rwlock.c b/src/pthread_rwlock.c
index c7b5373..1b38dd0 100644
--- a/src/pthread_rwlock.c
+++ b/src/pthread_rwlock.c
@@ -308,25 +308,39 @@ pthread_rwlock_init(pthread_rwlock_t *orwlock, const pthread_rwlockattr_t *attr)
 	return res;
 }
 
+PTHREAD_NOINLINE
+static int
+_pthread_rwlock_check_init_slow(pthread_rwlock_t *orwlock)
+{
+	int res = EINVAL;
+	_pthread_rwlock *rwlock = (_pthread_rwlock *)orwlock;
+
+	if (rwlock->sig == _PTHREAD_RWLOCK_SIG_init) {
+		LOCK(rwlock->lock);
+		if (rwlock->sig == _PTHREAD_RWLOCK_SIG_init) {
+			res = __pthread_rwlock_init(rwlock, NULL);
+		} else if (rwlock->sig == _PTHREAD_RWLOCK_SIG){
+			res = 0;
+		}
+		UNLOCK(rwlock->lock);
+	} else if (rwlock->sig == _PTHREAD_RWLOCK_SIG){
+		res = 0;
+	}
+	if (res != 0) {
+		PLOCKSTAT_RW_ERROR(orwlock, READ_LOCK_PLOCKSTAT, res);
+	}
+	return res;
+}
+
+PTHREAD_ALWAYS_INLINE
 static int
 _pthread_rwlock_check_init(pthread_rwlock_t *orwlock)
 {
 	int res = 0;
 	_pthread_rwlock *rwlock = (_pthread_rwlock *)orwlock;
+
 	if (rwlock->sig != _PTHREAD_RWLOCK_SIG) {
-		res = EINVAL;
-		if (rwlock->sig == _PTHREAD_RWLOCK_SIG_init) {
-			LOCK(rwlock->lock);
-			if (rwlock->sig == _PTHREAD_RWLOCK_SIG_init) {
-				res = __pthread_rwlock_init(rwlock, NULL);
-			} else if (rwlock->sig == _PTHREAD_RWLOCK_SIG){
-				res = 0;
-			}
-			UNLOCK(rwlock->lock);
-		}
-		if (res != 0) {
-			PLOCKSTAT_RW_ERROR(orwlock, READ_LOCK_PLOCKSTAT, res);
-		}
+		return _pthread_rwlock_check_init_slow(orwlock);
 	}
 	return res;
 }
diff --git a/src/pthread_support.c b/src/pthread_support.c
index 5ae4239..5097d60 100644
--- a/src/pthread_support.c
+++ b/src/pthread_support.c
@@ -24,7 +24,7 @@
 #include "internal.h"
 #include <dlfcn.h>
 #include <_simple.h>
-#include <CrashReporterClient.h>
+
 
 #define __SIGABRT 6
 
@@ -45,21 +45,11 @@ __pthread_abort(void)
 	} else {
 		__kill(__getpid(), __SIGABRT, 0);
 	}
+	__builtin_trap();
 }
 
 void
 __pthread_abort_reason(const char *fmt, ...)
 {
-#if !TARGET_OS_EMBEDDED
-	va_list ap;
-	const char *str = fmt;
-	_SIMPLE_STRING s = _simple_salloc();
-	va_start(ap, fmt);
-	if (_simple_vsprintf(s, fmt, ap) == 0) {
-		str = _simple_string(s);
-	}
-	CRSetCrashLogMessage(str);
-	va_end(ap);
-#endif
 	__pthread_abort();
 }
diff --git a/src/pthread_tsd.c b/src/pthread_tsd.c
index e3fffe7..a8d984d 100644
--- a/src/pthread_tsd.c
+++ b/src/pthread_tsd.c
@@ -65,6 +65,9 @@ static int __pthread_tsd_max = __pthread_tsd_first;
 static const int __pthread_tsd_start = _INTERNAL_POSIX_THREAD_KEYS_MAX;
 static const int __pthread_tsd_end = _INTERNAL_POSIX_THREAD_KEYS_END;
 
+static int __pthread_key_legacy_behaviour = 0;
+static int __pthread_key_legacy_behaviour_log = 0;
+
 // Omit support for pthread key destructors in the static archive for dyld.
 // dyld does not create and destroy threads so these are not necessary.
 //
@@ -79,6 +82,15 @@ static struct {
 
 static pthread_lock_t tsd_lock = LOCK_INITIALIZER;
 
+// The pthread_tsd destruction order can be reverted to the old (pre-10.11) order
+// by setting this environment variable.
+void
+_pthread_key_global_init(const char *envp[])
+{
+	__pthread_key_legacy_behaviour = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER") ? 1 : 0;
+	__pthread_key_legacy_behaviour_log = _simple_getenv(envp, "PTHREAD_KEY_LEGACY_DESTRUCTOR_ORDER_LOG") ? 1 : 0;
+}
+
 // Returns true if successful, false if destructor was already set.
 static bool
 _pthread_key_set_destructor(pthread_key_t key, void (*destructor)(void *))
@@ -209,10 +221,73 @@ _pthread_tsd_cleanup_key(pthread_t self, pthread_key_t key)
 }
 #endif // !VARIANT_DYLD
 
-void
-_pthread_tsd_cleanup(pthread_t self)
-{
+#import <_simple.h>
+#import <dlfcn.h>
+
 #if !VARIANT_DYLD
+static void
+_pthread_tsd_cleanup_new(pthread_t self)
+{
+	int j;
+
+	// clean up all keys except the garbage collection key
+	for (j = 0; j < PTHREAD_DESTRUCTOR_ITERATIONS; j++) {
+		pthread_key_t k;
+		for (k = __pthread_tsd_start; k <= self->max_tsd_key; k++) {
+			_pthread_tsd_cleanup_key(self, k);
+		}
+
+		for (k = __pthread_tsd_first; k <= __pthread_tsd_max; k++) {
+			if (k >= __PTK_FRAMEWORK_GC_KEY0 && k <= __PTK_FRAMEWORK_GC_KEY9) {
+				// GC must be cleaned up last
+				continue;
+			}
+			_pthread_tsd_cleanup_key(self, k);
+		}
+	}
+
+	self->max_tsd_key = 0;
+
+	// clean up all the GC keys
+	for (j = 0; j < PTHREAD_DESTRUCTOR_ITERATIONS; j++) {
+		pthread_key_t k;
+		for (k = __PTK_FRAMEWORK_GC_KEY0; k <= __PTK_FRAMEWORK_GC_KEY9; k++) {
+			_pthread_tsd_cleanup_key(self, k);
+		}
+	}
+}
+
+static void
+_pthread_tsd_behaviour_check(pthread_t self)
+{
+	// Iterate from dynamic-key start to dynamic-key end, if the key has both
+	// a desctructor and a value then _pthread_tsd_cleanup_key would cause
+	// us to re-trigger the destructor.
+	Dl_info i;
+	pthread_key_t k;
+
+	for (k = __pthread_tsd_start; k <= __pthread_tsd_end; k++) {
+		void (*destructor)(void *);
+		if (_pthread_key_get_destructor(k, &destructor)) {
+			void **ptr = &self->tsd[k];
+			void *value = *ptr;
+			if (value && destructor) {
+				_simple_asl_log(ASL_LEVEL_ERR, "pthread_tsd",
+						"warning: dynamic tsd keys dirty after static key cleanup loop.");
+
+				if (dladdr(destructor, &i) == 0) {
+					_simple_asl_log(ASL_LEVEL_ERR, "pthread_tsd", i.dli_fname);
+					_simple_asl_log(ASL_LEVEL_ERR, "pthread_tsd", i.dli_saddr);
+				}
+			}
+		}
+	}
+
+}
+
+static void
+_pthread_tsd_cleanup_legacy(pthread_t self)
+{
 	int j;
 
 	// clean up dynamic keys first
@@ -231,6 +306,28 @@ _pthread_tsd_cleanup(pthread_t self)
 		for (k = __pthread_tsd_first; k <= __pthread_tsd_max; k++) {
 			_pthread_tsd_cleanup_key(self, k);
 		}
+
+		if (__pthread_key_legacy_behaviour_log != 0 && self->max_tsd_key != 0) {
+			// max_tsd_key got dirtied, either by static or dynamic keys being
+			// reset. check for any dirty dynamic keys.
+			_pthread_tsd_behaviour_check(self);
+		}
+	}
+}
+#endif // !VARIANT_DYLD
+
+void
+_pthread_tsd_cleanup(pthread_t self)
+{
+#if !VARIANT_DYLD
+
+	// unless __pthread_key_legacy_behaviour == 1, use the new pthread key
+	// destructor order: (dynamic -> static) x5 -> (GC x5)
+
+	if (__pthread_key_legacy_behaviour == 0) {
+		_pthread_tsd_cleanup_new(self);
+	} else {
+		_pthread_tsd_cleanup_legacy(self);
 	}
 #endif // !VARIANT_DYLD
 }
diff --git a/src/qos.c b/src/qos.c
index 1213695..485b93b 100644
--- a/src/qos.c
+++ b/src/qos.c
@@ -42,8 +42,8 @@ static pthread_priority_t _main_qos = QOS_CLASS_UNSPECIFIED;
 struct pthread_override_s
 {
 	uint32_t sig;
-	pthread_t pthread;
 	mach_port_t kthread;
+	pthread_t pthread;
 	pthread_priority_t priority;
 	bool malloced;
 };
@@ -344,6 +344,20 @@ pthread_set_fixedpriority_self(void)
 	}
 }
 
+int
+pthread_set_timeshare_self(void)
+{
+	if (!(__pthread_supported_features & PTHREAD_FEATURE_BSDTHREADCTL)) {
+		return ENOTSUP;
+	}
+	
+	if (__pthread_supported_features & PTHREAD_FEATURE_SETSELF) {
+		return _pthread_set_properties_self(_PTHREAD_SET_SELF_TIMESHARE_FLAG, 0, 0);
+	} else {
+		return ENOTSUP;
+	}
+}
+
 
 pthread_override_t
 pthread_override_qos_class_start_np(pthread_t __pthread,  qos_class_t __qos_class, int __relative_priority)
diff --git a/src/resolver.c b/src/resolver.c
new file mode 100644
index 0000000..cb9ae24
--- /dev/null
+++ b/src/resolver.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+#include "resolver_internal.h"
+
+#define _OS_VARIANT_RESOLVER(s, v, ...) \
+	__attribute__((visibility(OS_STRINGIFY(v)))) extern void* s(void); \
+	void* s(void) { \
+	__asm__(".symbol_resolver _" OS_STRINGIFY(s)); \
+		__VA_ARGS__ \
+	}
+
+#define _OS_VARIANT_UPMP_RESOLVER(s, v) \
+	_OS_VARIANT_RESOLVER(s, v, \
+		uint32_t *_c = (void*)(uintptr_t)_COMM_PAGE_CPU_CAPABILITIES; \
+		if (*_c & kUP) { \
+			extern void OS_VARIANT(s, up)(void); \
+			return &OS_VARIANT(s, up); \
+		} else { \
+			extern void OS_VARIANT(s, mp)(void); \
+			return &OS_VARIANT(s, mp); \
+		})
+
+#define OS_VARIANT_UPMP_RESOLVER(s) \
+	_OS_VARIANT_UPMP_RESOLVER(s, default)
+
+#define OS_VARIANT_UPMP_RESOLVER_INTERNAL(s) \
+	_OS_VARIANT_UPMP_RESOLVER(s, hidden)
+
+
+#ifdef OS_VARIANT_SELECTOR
+
+OS_VARIANT_UPMP_RESOLVER(pthread_mutex_lock)
+OS_VARIANT_UPMP_RESOLVER(pthread_mutex_trylock)
+OS_VARIANT_UPMP_RESOLVER(pthread_mutex_unlock)
+
+#endif // OS_VARIANT_SELECTOR
diff --git a/src/resolver.h b/src/resolver.h
new file mode 100644
index 0000000..b34e65d
--- /dev/null
+++ b/src/resolver.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+#ifndef __PTHREAD_RESOLVER_H__
+#define __PTHREAD_RESOLVER_H__
+
+#include "resolver_internal.h"
+
+#ifdef OS_VARIANT_SELECTOR
+
+#if TARGET_OS_EMBEDDED
+#define pthread_mutex_lock \
+		OS_VARIANT(pthread_mutex_lock, OS_VARIANT_SELECTOR)
+#define pthread_mutex_trylock \
+		OS_VARIANT(pthread_mutex_trylock, OS_VARIANT_SELECTOR)
+#define pthread_mutex_unlock \
+		OS_VARIANT(pthread_mutex_unlock, OS_VARIANT_SELECTOR)
+#endif // TARGET_OS_EMBEDDED
+
+#endif // OS_VARIANT_SELECTOR
+
+#endif // __PTHREAD_RESOLVER_H__
diff --git a/src/resolver_internal.h b/src/resolver_internal.h
new file mode 100644
index 0000000..6974e39
--- /dev/null
+++ b/src/resolver_internal.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_START@
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @APPLE_APACHE_LICENSE_HEADER_END@
+ */
+
+#ifndef __PTHREAD_RESOLVER_INTERNAL_H__
+#define __PTHREAD_RESOLVER_INTERNAL_H__
+
+#include <os/base.h>
+#include <TargetConditionals.h>
+#include <machine/cpu_capabilities.h>
+#if defined(__arm__)
+#include <arm/arch.h>
+#endif
+
+#if !defined(PTHREAD_TARGET_EOS) && !defined(VARIANT_DYLD) && \
+     defined(_ARM_ARCH_7) && !defined(__ARM_ARCH_7S__)
+
+#if OS_ATOMIC_UP
+#define OS_VARIANT_SELECTOR up
+#else
+#define OS_VARIANT_SELECTOR mp
+#endif
+
+#endif // !PTHREAD_TARGET_EOS && !VARIANT_DYLD && _ARM_ARCH_7 && !__ARM_ARCH_7S__
+
+#define OS_VARIANT(f, v) OS_CONCAT(f, OS_CONCAT($VARIANT$, v))
+
+#ifdef OS_VARIANT_SELECTOR
+#define PTHREAD_NOEXPORT_VARIANT  PTHREAD_NOEXPORT
+#else
+#define PTHREAD_NOEXPORT_VARIANT
+#endif
+
+#endif // __PTHREAD_RESOLVER_H__
diff --git a/tests/Makefile b/tests/Makefile
new file mode 100644
index 0000000..219bc92
--- /dev/null
+++ b/tests/Makefile
@@ -0,0 +1,20 @@
+SOURCES := $(wildcard *.c)
+TARGETS := $(patsubst %.c,%,$(SOURCES))
+TESTS   := $(patsubst %,test-%,$(TARGETS))
+
+CFLAGS := -I$(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
+
+all: $(TARGETS)
+
+test check: $(TESTS)
+
+$(TESTS): test-%: %
+	@echo "[TEST] $<"
+	@$(TEST_ENV) ./$<
+	@echo "[END] $<"
+	@echo
+
+clean:
+	rm -f $(TARGETS)
+
+.PHONY: all test check clean $(TESTS)
diff --git a/tests/atfork.c b/tests/atfork.c
new file mode 100644
index 0000000..7b760c0
--- /dev/null
+++ b/tests/atfork.c
@@ -0,0 +1,75 @@
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <os/assumes.h>
+#include <sys/wait.h>
+
+#define DECL_ATFORK(x) \
+static void prepare_##x(void) { \
+	printf("%d: %s\n", getpid(), __FUNCTION__); \
+} \
+static void parent_##x(void) { \
+	printf("%d: %s\n", getpid(), __FUNCTION__); \
+} \
+static void child_##x(void) { \
+	printf("%d: %s\n", getpid(), __FUNCTION__); \
+}
+
+#define ATFORK(x) \
+os_assumes_zero(pthread_atfork(prepare_##x, parent_##x, child_##x));
+
+DECL_ATFORK(1);
+DECL_ATFORK(2);
+DECL_ATFORK(3);
+DECL_ATFORK(4);
+DECL_ATFORK(5);
+DECL_ATFORK(6);
+DECL_ATFORK(7);
+DECL_ATFORK(8);
+DECL_ATFORK(9);
+DECL_ATFORK(10);
+DECL_ATFORK(11);
+DECL_ATFORK(12);
+DECL_ATFORK(13);
+DECL_ATFORK(14);
+DECL_ATFORK(15);
+DECL_ATFORK(16);
+DECL_ATFORK(17);
+DECL_ATFORK(18);
+DECL_ATFORK(19);
+
+int main(int argc, char *argv[]) {
+	ATFORK(1);
+	ATFORK(2);
+	ATFORK(3);
+	ATFORK(4);
+	ATFORK(5);
+	ATFORK(6);
+	ATFORK(7);
+	ATFORK(8);
+	ATFORK(9);
+	ATFORK(10);
+	ATFORK(11);
+	ATFORK(12);
+	ATFORK(13);
+	ATFORK(14);
+	ATFORK(15);
+	ATFORK(16);
+	ATFORK(17);
+	ATFORK(18);
+	ATFORK(19);
+
+	pid_t pid = fork();
+	if (pid == 0) {
+		pid = fork(); 
+	}
+	if (pid == -1) {
+		posix_assumes_zero(pid);
+	} else if (pid > 0) {
+		int status;
+		posix_assumes_zero(waitpid(pid, &status, 0));
+		posix_assumes_zero(WEXITSTATUS(status));
+	}
+	return 0;
+}
diff --git a/tests/cond.c b/tests/cond.c
new file mode 100644
index 0000000..65c53ce
--- /dev/null
+++ b/tests/cond.c
@@ -0,0 +1,114 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <libkern/OSAtomic.h>
+
+struct context {
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+	long waiters;
+	long count;
+};
+
+void *wait_thread(void *ptr) {
+	int res;
+	struct context *context = ptr;
+
+	int i = 0;
+	char *str;
+
+	bool loop = true;
+	while (loop) {
+		res = pthread_mutex_lock(&context->mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_lock: %s\n", context->count, strerror(res));
+			abort();
+		}
+		
+		if (context->count > 0) {
+			++context->waiters;
+			res = pthread_cond_wait(&context->cond, &context->mutex);
+			if (res) {
+				fprintf(stderr, "[%ld] pthread_rwlock_unlock: %s\n", context->count, strerror(res));
+				abort();
+			}
+			--context->waiters;
+			--context->count;
+		} else {
+			loop = false;
+		}
+		
+		res = pthread_mutex_unlock(&context->mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_unlock: %s\n", context->count, strerror(res));
+			abort();
+		}
+	}
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct context context = {
+		.cond = PTHREAD_COND_INITIALIZER,
+		.mutex = PTHREAD_MUTEX_INITIALIZER,
+		.waiters = 0,
+		.count = 500000,
+	};
+	int i;
+	int res;
+	int threads = 2;
+	pthread_t p[threads];
+	for (i = 0; i < threads; ++i) {
+		res = pthread_create(&p[i], NULL, wait_thread, &context);
+		assert(res == 0);
+	}
+
+	long half = context.count / 2;
+
+	bool loop = true;
+	while (loop) {
+		res = pthread_mutex_lock(&context.mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_lock: %s\n", context.count, strerror(res));
+			abort();
+		}
+		if (context.waiters) {
+			char *str;
+			if (context.count > half) {
+				str = "pthread_cond_broadcast";
+				res = pthread_cond_broadcast(&context.cond);
+			} else {
+				str = "pthread_cond_signal";
+				res = pthread_cond_signal(&context.cond);
+			}
+			if (res != 0) {
+				fprintf(stderr, "[%ld] %s: %s\n", context.count, str, strerror(res));
+				abort();
+			}
+		}
+		if (context.count <= 0) {
+			loop = false;
+		}
+		
+		res = pthread_mutex_unlock(&context.mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_unlock: %s\n", context.count, strerror(res));
+			abort();
+		}
+	}
+	
+	
+	for (i = 0; i < threads; ++i) {
+		res = pthread_join(p[i], NULL);
+		assert(res == 0);
+	}
+
+	return 0;
+}
diff --git a/tests/cond_timed.c b/tests/cond_timed.c
new file mode 100644
index 0000000..318b406
--- /dev/null
+++ b/tests/cond_timed.c
@@ -0,0 +1,115 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <libkern/OSAtomic.h>
+#include <dispatch/dispatch.h>
+
+struct context {
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+	long udelay;
+	long count;
+};
+
+void *wait_thread(void *ptr) {
+	int res;
+	struct context *context = ptr;
+
+	int i = 0;
+	char *str;
+
+	bool loop = true;
+	while (loop) {
+		struct timespec ts;
+		struct timeval tv;
+		gettimeofday(&tv, NULL);
+		uint64_t ns = tv.tv_usec * NSEC_PER_USEC + context->udelay * NSEC_PER_USEC;
+		ts.tv_nsec = ns >= NSEC_PER_SEC ? ns % NSEC_PER_SEC : ns;
+		ts.tv_sec = tv.tv_sec + (ns >= NSEC_PER_SEC ? ns / NSEC_PER_SEC : 0);
+
+		res = pthread_mutex_lock(&context->mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_lock: %s\n", context->count, strerror(res));
+			abort();
+		}
+
+		if (context->count > 0) {
+			res = pthread_cond_timedwait(&context->cond, &context->mutex, &ts);
+			if (res != ETIMEDOUT) {
+				fprintf(stderr, "[%ld] pthread_cond_timedwait: %s\n", context->count, strerror(res));
+				abort();
+			}
+			--context->count;
+		} else {
+			loop = false;
+		}
+
+		res = pthread_mutex_unlock(&context->mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_unlock: %s\n", context->count, strerror(res));
+			abort();
+		}
+	}
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	const int threads = 8;
+	struct context context = {
+		.cond = PTHREAD_COND_INITIALIZER,
+		.mutex = PTHREAD_MUTEX_INITIALIZER,
+		.udelay = 5000,
+		.count = 8000,
+	};
+	int i;
+	int res;
+
+	uint64_t uexpected = (context.udelay * context.count) / threads;
+	printf("waittime expected: %llu us\n", uexpected); 
+	struct timeval start, end;
+	gettimeofday(&start, NULL);
+
+	pthread_t p[threads];
+	for (i = 0; i < threads; ++i) {
+		res = pthread_create(&p[i], NULL, wait_thread, &context);
+		assert(res == 0);
+	}
+
+	usleep(uexpected);
+	bool loop = true;
+	while (loop) {
+		res = pthread_mutex_lock(&context.mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_lock: %s\n", context.count, strerror(res));
+			abort();
+		}
+		if (context.count <= 0) {
+			loop = false;
+		}
+		res = pthread_mutex_unlock(&context.mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_unlock: %s\n", context.count, strerror(res));
+			abort();
+		}
+	}
+
+	for (i = 0; i < threads; ++i) {
+		res = pthread_join(p[i], NULL);
+		assert(res == 0);
+	}
+
+	gettimeofday(&end, NULL);
+	uint64_t uelapsed = (end.tv_sec * USEC_PER_SEC + end.tv_usec) -
+			(start.tv_sec * USEC_PER_SEC + start.tv_usec);
+	printf("waittime actual:   %llu us\n", uelapsed);
+
+	return 0;
+}
diff --git a/tests/custom_stack.c b/tests/custom_stack.c
new file mode 100644
index 0000000..1066ea0
--- /dev/null
+++ b/tests/custom_stack.c
@@ -0,0 +1,38 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <os/assumes.h>
+
+void *function(void *arg) {
+	// Use the stack...
+	char buffer[BUFSIZ];
+	strlcpy(buffer, arg, sizeof(buffer));
+	strlcat(buffer, arg, sizeof(buffer));
+
+	printf("%s", buffer);
+	sleep(30);
+	return (void *)(uintptr_t)strlen(buffer);
+}
+
+int main(int argc, char *argv[]) {
+	char *arg = "This is a test and only a test of the pthread stackaddr system.\n";
+	size_t stacksize = 4096 * 5;
+	uintptr_t stackaddr = (uintptr_t)valloc(stacksize);
+	stackaddr += stacksize; // address starts at top of stack.
+
+	pthread_t thread;
+	pthread_attr_t attr;
+
+	os_assumes_zero(pthread_attr_init(&attr));
+	os_assumes_zero(pthread_attr_setstacksize(&attr, stacksize));
+	os_assumes_zero(pthread_attr_setstackaddr(&attr, (void *)stackaddr));
+
+	os_assumes_zero(pthread_create(&thread, &attr, function, arg));
+
+	void *result;
+	os_assumes_zero(pthread_join(thread, &result));
+	os_assumes((uintptr_t)result == (uintptr_t)strlen(arg)*2);
+
+	return 0;
+}
diff --git a/tests/join.c b/tests/join.c
new file mode 100644
index 0000000..3f9bf40
--- /dev/null
+++ b/tests/join.c
@@ -0,0 +1,95 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <mach/mach.h>
+
+#define WAITTIME (100 * 1000)
+
+static inline void*
+test(void)
+{
+	static uintptr_t idx;
+	printf("Join %lu\n", ++idx);
+	return (void*)idx;
+}
+
+static void *
+thread(void *param)
+{
+	usleep(WAITTIME);
+	return param;
+}
+
+static void *
+thread1(void *param)
+{
+	int res;
+	pthread_t p = param;
+
+	usleep(WAITTIME);
+	res = pthread_join(p, NULL);
+	assert(res == 0);
+	printf("Done\n");
+	return 0;
+}
+
+__attribute((noreturn))
+int
+main(void)
+{
+	int res;
+	kern_return_t kr;
+	pthread_t p = NULL;
+	void *param, *value;
+
+	param = test();
+	res = pthread_create(&p, NULL, thread, param);
+	assert(res == 0);
+	value = NULL;
+	res = pthread_join(p, &value);
+	assert(res == 0);
+	assert(param == value);
+
+	param = test();
+	res = pthread_create(&p, NULL, thread, param);
+	assert(res == 0);
+	usleep(3 * WAITTIME);
+	value = NULL;
+	res = pthread_join(p, &value);
+	assert(res == 0);
+	assert(param == value);
+
+	param = test();
+	res = pthread_create_suspended_np(&p, NULL, thread, param);
+	assert(res == 0);
+	kr = thread_resume(pthread_mach_thread_np(p));
+	assert(kr == 0);
+	value = NULL;
+	res = pthread_join(p, &value);
+	assert(res == 0);
+	assert(param == value);
+
+	param = test();
+	res = pthread_create_suspended_np(&p, NULL, thread, param);
+	assert(res == 0);
+	kr = thread_resume(pthread_mach_thread_np(p));
+	assert(kr == 0);
+	usleep(3 * WAITTIME);
+	value = NULL;
+	res = pthread_join(p, &value);
+	assert(res == 0);
+	assert(param == value);
+
+	test();
+	param = pthread_self();
+	res = pthread_create_suspended_np(&p, NULL, thread1, param);
+	assert(res == 0);
+	res = pthread_detach(p);
+	assert(res == 0);
+	kr = thread_resume(pthread_mach_thread_np(p));
+	assert(kr == 0);
+	pthread_exit(0);
+}
+
diff --git a/tests/maxwidth.c b/tests/maxwidth.c
new file mode 100644
index 0000000..9c1dd04
--- /dev/null
+++ b/tests/maxwidth.c
@@ -0,0 +1,97 @@
+#include <dispatch/dispatch.h>
+#include <dispatch/private.h>
+#include <stdio.h>
+
+#define NUM 100000
+static volatile size_t concur;
+static volatile size_t final;
+dispatch_queue_t resultsq;
+dispatch_group_t rgroup;
+
+void finish(void* ctxt)
+{
+	int c = (uintptr_t)ctxt;
+	if (c > final) final = c;
+}
+
+void work(void* ctxt)
+{
+	int c = __sync_add_and_fetch(&concur, 1);
+	if (ctxt) {
+		usleep(1000);
+	} else {
+		for (int i=0; i<100000; i++) {
+			__asm__ __volatile__ ("");
+		}
+	}
+	dispatch_group_async_f(rgroup, resultsq, (void*)(uintptr_t)c, finish);
+	__sync_sub_and_fetch(&concur, 1);
+}
+
+int main(int argc, const char *argv[])
+{
+	size_t i;
+
+	rgroup = dispatch_group_create();
+	resultsq = dispatch_queue_create("results", 0);
+	dispatch_suspend(resultsq);
+
+	dispatch_group_t group = dispatch_group_create();
+
+	final = concur = 0;
+	for (i=0; i<NUM; i++) {
+		dispatch_group_async_f(group, dispatch_get_global_queue(0, 0), NULL, work);
+	}
+
+	dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
+	dispatch_resume(resultsq);
+
+	dispatch_group_wait(rgroup, DISPATCH_TIME_FOREVER);
+	printf("max concurrency: %zd threads.\n", final);
+
+	dispatch_suspend(resultsq);
+	
+	/* ******* */
+
+	final = concur = 0;
+	for (i=0; i<NUM; i++) {
+		dispatch_group_async_f(group, dispatch_get_global_queue(0, 0), (void*)1, work);
+	}
+
+	dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
+	dispatch_resume(resultsq);
+
+	dispatch_group_wait(rgroup, DISPATCH_TIME_FOREVER);
+	printf("max blocking concurrency: %zd threads.\n", final);
+
+	dispatch_suspend(resultsq);
+
+	/* ******* */
+
+	final = concur = 0;
+	for (i=0; i<NUM; i++) {
+		dispatch_group_async_f(group, dispatch_get_global_queue(0, DISPATCH_QUEUE_OVERCOMMIT), NULL, work);
+	}
+
+	dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
+	dispatch_resume(resultsq);
+
+	dispatch_group_wait(rgroup, DISPATCH_TIME_FOREVER);
+	printf("max overcommit concurrency: %zd threads.\n", final);
+	dispatch_suspend(resultsq);
+
+	/* ******* */
+
+	final = concur = 0;
+	for (i=0; i<NUM; i++) {
+		dispatch_group_async_f(group, dispatch_get_global_queue(0, DISPATCH_QUEUE_OVERCOMMIT), (void*)1, work);
+	}
+
+	dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
+	dispatch_resume(resultsq);
+
+	dispatch_group_wait(rgroup, DISPATCH_TIME_FOREVER);
+	printf("max blocking overcommit concurrency: %zd threads.\n", final);
+
+	return 0;
+}
diff --git a/tests/mutex.c b/tests/mutex.c
new file mode 100644
index 0000000..b80d58f
--- /dev/null
+++ b/tests/mutex.c
@@ -0,0 +1,87 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+
+struct context {
+	pthread_mutex_t mutex;
+	long value;
+	long count;
+};
+
+void *test_thread(void *ptr) {
+	int res;
+	long old;
+	struct context *context = ptr;
+
+	int i = 0;
+	char *str;
+
+	do {
+		bool try = i & 1;
+
+		switch (i++ & 1) {
+			case 0:
+				str = "pthread_mutex_lock";
+				res = pthread_mutex_lock(&context->mutex);
+				break;
+			case 1:
+				str = "pthread_mutex_trylock";
+				res = pthread_mutex_trylock(&context->mutex);
+				break;
+		}
+		if (res != 0) {
+			if (try && res == EBUSY) {
+				continue;
+			}
+			fprintf(stderr, "[%ld] %s: %s\n", context->count, str, strerror(res));
+			abort();
+		}
+		
+		old = __sync_fetch_and_or(&context->value, 1);
+		if ((old & 1) != 0) {
+			fprintf(stderr, "[%ld] OR %lx\n", context->count, old);
+			abort();
+		}
+
+		old = __sync_fetch_and_and(&context->value, 0);
+		if ((old & 1) == 0) {
+			fprintf(stderr, "[%ld] AND %lx\n", context->count, old);
+			abort();
+		}
+	
+		res = pthread_mutex_unlock(&context->mutex);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_mutex_lock: %s\n", context->count, strerror(res));
+			abort();
+		}
+	} while (__sync_fetch_and_sub(&context->count, 1) > 0);
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	struct context context = {
+		.mutex = PTHREAD_MUTEX_INITIALIZER,
+		.value = 0,
+		.count = 5000000,
+	};
+	int i;
+	int res;
+	int threads = 16;
+	pthread_t p[threads];
+	for (i = 0; i < threads; ++i) {
+		res = pthread_create(&p[i], NULL, test_thread, &context);
+		assert(res == 0);
+	}
+	for (i = 0; i < threads; ++i) {
+		res = pthread_join(p[i], NULL);
+		assert(res == 0);
+	}
+	
+	return 0;
+}
diff --git a/tests/once.c b/tests/once.c
new file mode 100644
index 0000000..a7005fe
--- /dev/null
+++ b/tests/once.c
@@ -0,0 +1,61 @@
+#define __DARWIN_NON_CANCELABLE 0
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static pthread_once_t once = PTHREAD_ONCE_INIT;
+static int x = 0;
+
+void cancelled(void)
+{
+    printf("thread cancelled.\n");
+}
+
+void oncef(void)
+{
+    printf("in once handler: %p\n", pthread_self());
+    sleep(5);
+    x = 1;
+}
+
+void* a(void *ctx)
+{
+    printf("a started: %p\n", pthread_self());
+    pthread_cleanup_push((void*)cancelled, NULL);
+    pthread_once(&once, oncef);
+    pthread_cleanup_pop(0);
+    printf("a finished\n");
+    return NULL;
+}
+
+void* b(void *ctx)
+{
+    sleep(1); // give enough time for a() to get into pthread_once
+    printf("b started: %p\n", pthread_self());
+    pthread_once(&once, oncef);
+    printf("b finished\n");
+    return NULL;
+}
+
+int main(void)
+{
+    pthread_t t1;
+    if (pthread_create(&t1, NULL, a, NULL) != 0) {
+        fprintf(stderr, "failed to create thread a.");
+        exit(1);
+    }
+
+    pthread_t t2;
+    if (pthread_create(&t2, NULL, b, NULL) != 0) {
+        fprintf(stderr, "failed to create thread b.");
+        exit(1);
+    }
+
+    sleep(2);
+    pthread_cancel(t1);
+
+    pthread_join(t1, NULL);
+    pthread_join(t2, NULL);
+    exit(0);
+}
\ No newline at end of file
diff --git a/tests/qos.c b/tests/qos.c
new file mode 100644
index 0000000..9d464aa
--- /dev/null
+++ b/tests/qos.c
@@ -0,0 +1,135 @@
+/*% clang -o # -Wall -Wextra -I/System/Library/Frameworks/System.framework/PrivateHeaders %
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include <sys/qos.h>
+#include <sys/resource.h>
+#include <pthread.h>
+
+#include <mach/mach.h>
+#include <mach/host_info.h>
+#include <mach/mach_error.h>
+#include <mach/mach_types.h>
+#include <mach/message.h>
+#include <mach/mach_syscalls.h>
+#include <mach/policy.h>
+#include <mach/task_policy.h>
+
+#define QOS_TIER(i) \
+	(((i) == THREAD_QOS_UNSPECIFIED)        ?  QOS_CLASS_UNSPECIFIED     :       \
+	 ((i)  == THREAD_QOS_USER_INTERACTIVE)   ? QOS_CLASS_USER_INTERACTIVE :       \
+	 ((i)  == THREAD_QOS_USER_INITIATED)     ? QOS_CLASS_USER_INITIATED   :       \
+	 ((i)  == THREAD_QOS_LEGACY)             ? QOS_CLASS_LEGACY           :       \
+	 ((i)  == THREAD_QOS_UTILITY)            ? QOS_CLASS_UTILITY          :       \
+	 ((i)  == THREAD_QOS_BACKGROUND)         ? QOS_CLASS_BACKGROUND       :       \
+	 ((i)  == THREAD_QOS_MAINTENANCE)        ? QOS_CLASS_MAINTENANCE      :       \
+	 -1)
+
+struct kern_qos {
+	long requested;
+	long override;
+};
+
+void get_kern_qos(struct kern_qos *kern_qos){
+	kern_return_t kr;
+	boolean_t get_default = false;
+	mach_msg_type_number_t count;
+
+	struct thread_policy_state thread_policy;
+	count = THREAD_POLICY_STATE_COUNT;
+	kr = thread_policy_get(mach_thread_self(), THREAD_POLICY_STATE, (thread_policy_t)&thread_policy, &count, &get_default);
+	if (kr != KERN_SUCCESS) { mach_error("thread_policy_get(... THREAD_POLICY_STATE ...)", kr); }
+
+	kern_qos->requested = QOS_TIER((thread_policy.requested & POLICY_REQ_TH_QOS_MASK) >> POLICY_REQ_TH_QOS_SHIFT);
+	kern_qos->override = QOS_TIER((thread_policy.requested & POLICY_REQ_TH_QOS_OVER_MASK) >> POLICY_REQ_TH_QOS_OVER_SHIFT);
+}
+
+void assert_fixedpri(bool fixedpri){
+	kern_return_t kr;
+	boolean_t get_default = false;
+	mach_msg_type_number_t count;
+
+	thread_extended_policy_data_t extpol;
+	count = THREAD_EXTENDED_POLICY_COUNT;
+	kr = thread_policy_get(mach_thread_self(), THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, &count, &get_default);
+	if (kr != KERN_SUCCESS) { mach_error("thread_policy_get(... THREAD_EXTENDED_POLICY ...)", kr); }
+
+	assert(extpol.timeshare == !fixedpri);
+}
+
+void *assert_thread_qos(void *arg){
+	struct kern_qos *correct_kern_qos = (struct kern_qos *)arg;
+	struct kern_qos actual_kern_qos;
+
+	get_kern_qos(&actual_kern_qos);
+
+	assert(actual_kern_qos.requested == qos_class_self());
+	assert(actual_kern_qos.requested == correct_kern_qos->requested);
+	assert(actual_kern_qos.override == correct_kern_qos->override);
+
+	return NULL;
+}
+
+void *fixedpri_test(void *arg){
+	struct kern_qos *correct_kern_qos = (struct kern_qos *)arg;
+
+	assert_thread_qos(correct_kern_qos);
+	assert_fixedpri(false);
+
+	pthread_set_fixedpriority_self();
+
+	assert_thread_qos(correct_kern_qos);
+	assert_fixedpri(true);
+
+	pthread_set_timeshare_self();
+
+	assert_thread_qos(correct_kern_qos);
+	assert_fixedpri(false);
+
+	return NULL;
+}
+
+int main(){
+	if (geteuid() != 0){
+		printf("Must be run as root\n");
+		return 1;
+	}
+
+	struct kern_qos kern_qos;
+
+	pthread_t thread;
+	pthread_attr_t attr;
+	pthread_attr_init(&attr);
+
+	// Main thread QoS
+
+	kern_qos.requested = qos_class_self();
+	kern_qos.override = QOS_CLASS_UNSPECIFIED;
+
+	assert_thread_qos(&kern_qos);
+	assert(qos_class_self() == qos_class_main());
+
+	// Created pthread
+
+	kern_qos.requested = QOS_CLASS_UTILITY;
+	kern_qos.override = QOS_CLASS_UNSPECIFIED;
+
+	pthread_attr_set_qos_class_np(&attr, QOS_CLASS_UTILITY, 0);
+	pthread_create(&thread, &attr, assert_thread_qos, &kern_qos);
+	pthread_join(thread, NULL);
+
+	// pthread_set_fixedpriority_self()
+
+	kern_qos.requested = QOS_CLASS_USER_INITIATED;
+	kern_qos.override = QOS_CLASS_UNSPECIFIED;
+
+	pthread_attr_set_qos_class_np(&attr, QOS_CLASS_USER_INITIATED, 0);
+	pthread_create(&thread, &attr, fixedpri_test, &kern_qos);
+	pthread_join(thread, NULL);
+
+	return 0;
+}
diff --git a/tests/rwlock-signal.c b/tests/rwlock-signal.c
new file mode 100644
index 0000000..c077b0a
--- /dev/null
+++ b/tests/rwlock-signal.c
@@ -0,0 +1,152 @@
+#include <assert.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+
+struct context {
+	pthread_rwlock_t rwlock;
+	long value;
+	long count;
+};
+
+void mask_signals(bool masked, bool perthread)
+{
+	sigset_t mask;
+
+	if (masked) {
+		sigfillset(&mask);
+		sigdelset(&mask, SIGINT);
+	} else {
+		sigemptyset(&mask);
+		sigaddset(&mask, SIGWINCH);
+	}
+
+	int action = (masked ? SIG_BLOCK : SIG_UNBLOCK);
+	if (perthread) {
+		pthread_sigmask(action, &mask, NULL);
+	} else {
+		sigprocmask(action, &mask, NULL);
+	}
+}
+
+void test_signal(int signo)
+{
+	/* nothing */
+}
+
+void *test_signal_thread(void *ptr)
+{
+	sigset_t mask;
+	sigfillset(&mask);
+	sigdelset(&mask, SIGWINCH);
+	pthread_sigmask(SIG_BLOCK, &mask, NULL);
+
+	struct context *context = ptr;
+	do {
+		usleep(100);
+		kill(getpid(), SIGWINCH);
+	} while (context->count > 0);
+
+	return NULL;
+}
+
+void *test_thread(void *ptr) {
+	int res;
+	long old;
+	struct context *context = ptr;
+
+	int i = 0;
+	char *str;
+
+	mask_signals(false, true);
+
+	do {
+		bool try = i & 1;
+		bool exclusive = i & 2;
+		switch (i++ & 3) {
+			case 0:
+				str = "pthread_rwlock_rdlock";
+				res = pthread_rwlock_rdlock(&context->rwlock);
+				break;
+			case 1:
+				str = "pthread_rwlock_tryrdlock";
+				res = pthread_rwlock_tryrdlock(&context->rwlock);
+				break;
+			case 2:
+				str = "pthread_rwlock_wrlock";
+				res = pthread_rwlock_wrlock(&context->rwlock);
+				break;
+			case 3:
+				str = "pthread_rwlock_trywrlock";
+				res = pthread_rwlock_trywrlock(&context->rwlock);
+				break;
+		}
+		if (res != 0) {
+			if (try && res == EBUSY) {
+				continue;
+			}
+			fprintf(stderr, "[%ld] %s: %s\n", context->count, str, strerror(res));
+			abort();
+		}
+
+		if (exclusive) {
+			old = __sync_fetch_and_or(&context->value, 1);
+			if ((old & 1) != 0) {
+				fprintf(stderr, "[%ld] OR %lx\n", context->count, old);
+				abort();
+			}
+		}
+
+		old = __sync_fetch_and_and(&context->value, 0);
+		if ((old & 1) != (exclusive ? 1 : 0)) {
+			fprintf(stderr, "[%ld] AND %lx\n", context->count, old);
+			abort();
+		}
+	
+		res = pthread_rwlock_unlock(&context->rwlock);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_rwlock_unlock: %s\n", context->count, strerror(res));
+			abort();
+		}
+	} while (__sync_fetch_and_sub(&context->count, 1) > 0);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct context context = {
+		.rwlock = PTHREAD_RWLOCK_INITIALIZER,
+		.value = 0,
+		.count = 5000000,
+	};
+	int i;
+	int res;
+	int threads = 16;
+	pthread_t p[threads+1];
+
+	mask_signals(true, false);
+	signal(SIGWINCH, test_signal);
+
+	for (i = 0; i < threads; ++i) {
+		res = pthread_create(&p[i], NULL, test_thread, &context);
+		assert(res == 0);
+	}
+
+	pthread_create(&p[threads], NULL, test_signal_thread, &context);
+	assert(res == 0);
+
+	for (i = 0; i < threads; ++i) {
+		res = pthread_join(p[i], NULL);
+		assert(res == 0);
+	}
+	res = pthread_join(p[threads], NULL);
+	assert(res == 0);
+
+	return 0;
+}
diff --git a/tests/rwlock.c b/tests/rwlock.c
new file mode 100644
index 0000000..db6c929
--- /dev/null
+++ b/tests/rwlock.c
@@ -0,0 +1,98 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <errno.h>
+
+struct context {
+	pthread_rwlock_t rwlock;
+	long value;
+	long count;
+};
+
+void *test_thread(void *ptr) {
+	int res;
+	long old;
+	struct context *context = ptr;
+
+	int i = 0;
+	char *str;
+
+	do {
+		bool try = i & 1;
+		bool exclusive = i & 2;
+		switch (i++ & 3) {
+			case 0:
+				str = "pthread_rwlock_rdlock";
+				res = pthread_rwlock_rdlock(&context->rwlock);
+				break;
+			case 1:
+				str = "pthread_rwlock_tryrdlock";
+				res = pthread_rwlock_tryrdlock(&context->rwlock);
+				break;
+			case 2:
+				str = "pthread_rwlock_wrlock";
+				res = pthread_rwlock_wrlock(&context->rwlock);
+				break;
+			case 3:
+				str = "pthread_rwlock_trywrlock";
+				res = pthread_rwlock_trywrlock(&context->rwlock);
+				break;
+		}
+		if (res != 0) {
+			if (try && res == EBUSY) {
+				continue;
+			}
+			fprintf(stderr, "[%ld] %s: %s\n", context->count, str, strerror(res));
+			abort();
+		}
+
+		if (exclusive) {
+			old = __sync_fetch_and_or(&context->value, 1);
+			if ((old & 1) != 0) {
+				fprintf(stderr, "[%ld] OR %lx\n", context->count, old);
+				abort();
+			}
+		}
+
+		old = __sync_fetch_and_and(&context->value, 0);
+		if ((old & 1) != (exclusive ? 1 : 0)) {
+			fprintf(stderr, "[%ld] AND %lx\n", context->count, old);
+			abort();
+		}
+	
+		res = pthread_rwlock_unlock(&context->rwlock);
+		if (res) {
+			fprintf(stderr, "[%ld] pthread_rwlock_unlock: %s\n", context->count, strerror(res));
+			abort();
+		}
+	} while (__sync_fetch_and_sub(&context->count, 1) > 0);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct context context = {
+		.rwlock = PTHREAD_RWLOCK_INITIALIZER,
+		.value = 0,
+		.count = 5000000,
+	};
+	int i;
+	int res;
+	int threads = 16;
+	pthread_t p[threads];
+	for (i = 0; i < threads; ++i) {
+		res = pthread_create(&p[i], NULL, test_thread, &context);
+		assert(res == 0);
+	}
+	for (i = 0; i < threads; ++i) {
+		res = pthread_join(p[i], NULL);
+		assert(res == 0);
+	}
+
+	return 0;
+}
diff --git a/tests/tsd.c b/tests/tsd.c
new file mode 100644
index 0000000..998d75d
--- /dev/null
+++ b/tests/tsd.c
@@ -0,0 +1,55 @@
+#include <assert.h>
+#include <pthread.h>
+#include <stdio.h>
+
+void *ptr = NULL;
+
+void destructor(void *value)
+{
+	ptr = value;
+}
+
+void *thread(void *param)
+{
+	int res;
+
+	pthread_key_t key = *(pthread_key_t *)param;
+	res = pthread_setspecific(key, (void *)0x12345678);
+	assert(res == 0);
+	void *value = pthread_getspecific(key);
+
+	pthread_key_t key2;
+	res = pthread_key_create(&key, NULL);
+	assert(res == 0);
+	res = pthread_setspecific(key, (void *)0x55555555);
+	assert(res == 0);
+
+	return value;
+}
+
+int main(int argc, char *argv[])
+{
+	int res;
+	pthread_key_t key;
+
+	res = pthread_key_create(&key, destructor);
+	assert(res == 0);
+	printf("key = %ld\n", key);
+
+	pthread_t p = NULL;
+	res = pthread_create(&p, NULL, thread, &key);
+	assert(res == 0);
+
+	void *value = NULL;
+	res = pthread_join(p, &value);
+	printf("value = %p\n", value);
+	printf("ptr = %p\n", ptr);
+
+	assert(ptr == value);
+
+	res = pthread_key_delete(key);
+	assert(res == 0);
+
+	return 0;
+}
+
diff --git a/tests/wq_block_handoff.c b/tests/wq_block_handoff.c
new file mode 100644
index 0000000..d19283b
--- /dev/null
+++ b/tests/wq_block_handoff.c
@@ -0,0 +1,60 @@
+#include <dispatch/dispatch.h>
+#include <sys/sysctl.h>
+#include <stdio.h>
+
+static int x = 0;
+static int y = 0;
+
+int main(void)
+{
+	/* found in <rdar://problem/16326400> 12A216: Spotlight takes a long time to show results */
+	
+	/* we need to start up NCPU-1 threads in a given bucket, then fire up one more at a separate
+	 * priority.
+	 *
+	 * each of these waiters needs to be non-blocked until the point where dispatch wants to
+	 * request a new thread.
+	 *
+	 * if dispatch ever fixes sync_barrier -> sync handoff to not require an extra thread,
+	 * then this test will never fail and will be invalid.
+	 */
+	 
+	printf("[TEST] barrier_sync -> async @ ncpu threads\n");
+	 
+	dispatch_semaphore_t sema = dispatch_semaphore_create(0);
+	
+	int ncpu = 1;
+	size_t sz = sizeof(ncpu);
+	sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0);
+	printf("starting up %d waiters.\n", ncpu);
+	
+	dispatch_queue_t q = dispatch_queue_create("moo", DISPATCH_QUEUE_CONCURRENT);
+	dispatch_barrier_sync(q, ^{
+		dispatch_async(q, ^{ 
+			printf("async.\n"); 
+			dispatch_semaphore_signal(sema);
+		});
+		for (int i=0; i<ncpu-1; i++) {
+			dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0), ^{
+				printf("waiter %d* up.\n", i);
+				while (y == 0) { };
+			});
+		}
+		dispatch_async(dispatch_get_global_queue(0, 0), ^{
+			printf("waiter %d up.\n", ncpu-1);
+			while (x == 0) { };
+			printf("waiter %d idle.\n", ncpu-1);
+			usleep(1000);
+			dispatch_sync(q, ^{ printf("quack %d\n", ncpu-1); });
+		});
+		printf("waiting...\n");
+		sleep(1);
+		printf("done.\n");
+	});
+	
+	x = 1;
+	int rv = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, 2ull * NSEC_PER_SEC));
+	printf("[%s] barrier_sync -> async completed\n", rv == 0 ? "PASS" : "FAIL");
+
+	return rv;
+}
diff --git a/tests/wq_event_manager.c b/tests/wq_event_manager.c
new file mode 100644
index 0000000..bed9faa
--- /dev/null
+++ b/tests/wq_event_manager.c
@@ -0,0 +1,95 @@
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/qos.h>
+
+#include <dispatch/dispatch.h>
+
+#include "../private/workqueue_private.h"
+#include "../private/qos_private.h"
+
+#include "wq_kevent.h"
+
+static dispatch_semaphore_t sema;
+static dispatch_time_t timeout;
+
+static int do_wait(int threads){
+	for (int i = 0; i < threads; i++){
+		int ret = dispatch_semaphore_wait(sema, timeout);
+		if (ret){
+			fprintf(stderr, "timout waiting for thread %d.\n", i);
+			return 1;
+		}
+	}
+	fprintf(stderr, "\tsuccessfully signaled by %d threads.\n", threads);
+	return 0;
+}
+
+static void workqueue_func(pthread_priority_t priority){
+	fprintf(stderr, "WARNING: workqueue_func called.\n");
+	dispatch_semaphore_signal(sema);
+}
+
+void (^cb)(void) = NULL;
+static void workqueue_func_kevent(void **buf, int *count){
+	pthread_priority_t p = (pthread_priority_t)pthread_getspecific(4);
+	fprintf(stderr, "\tthread with qos %s spawned.\n", describe_pri(p));
+
+	if (cb){
+		cb();
+	}
+
+	dispatch_semaphore_signal(sema);
+}
+
+int main(int argc, char *argv[]){
+	int ret = 0;
+	int exit_status = 0;
+
+	ret = _pthread_workqueue_init_with_kevent(workqueue_func, workqueue_func_kevent, 0, 0);
+	assert(ret == 0);
+
+	sema = dispatch_semaphore_create(0);
+	assert(sema != NULL);
+	timeout = dispatch_time(DISPATCH_TIME_NOW, 5LL * NSEC_PER_SEC);
+
+	// one event manager
+	requests[0].priority = _pthread_qos_class_encode(QOS_CLASS_UNSPECIFIED, 0, _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG);
+	requests[0].count = 1;
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+
+	fprintf(stderr, "event_manager_priority -> USER_INITIATED\n");
+	_pthread_workqueue_set_event_manager_priority(_pthread_qos_class_encode(QOS_CLASS_USER_INITIATED, 0, 0));
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+
+	fprintf(stderr, "event_manager_priority -> UTILITY\n");
+	_pthread_workqueue_set_event_manager_priority(_pthread_qos_class_encode(QOS_CLASS_UTILITY, 0, 0));
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+
+	fprintf(stderr, "event_manager_priority -> 60\n");
+	_pthread_workqueue_set_event_manager_priority(_PTHREAD_PRIORITY_SCHED_PRI_FLAG | 60);;
+
+	cb = ^(void){sleep(2);};
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+
+	requests[0].priority = _pthread_qos_class_encode(QOS_CLASS_USER_INITIATED, 0, 0);
+	requests[0].count = 1;
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+	
+	return 0;
+}
diff --git a/tests/wq_kevent.c b/tests/wq_kevent.c
new file mode 100644
index 0000000..808cc25
--- /dev/null
+++ b/tests/wq_kevent.c
@@ -0,0 +1,108 @@
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/qos.h>
+
+#include <dispatch/dispatch.h>
+
+#include "../private/workqueue_private.h"
+#include "../private/qos_private.h"
+
+#include "wq_kevent.h"
+
+static dispatch_semaphore_t sema;
+static dispatch_time_t timeout;
+
+static int do_wait(int threads){
+	for (int i = 0; i < threads; i++){
+		int ret = dispatch_semaphore_wait(sema, timeout);
+		if (ret){
+			fprintf(stderr, "timout waiting for thread %d.\n", i);
+			return 1;
+		}
+	}
+	fprintf(stderr, "\tsuccessfully signaled by %d threads.\n", threads);
+	return 0;
+}
+
+static void workqueue_func(pthread_priority_t priority){
+	fprintf(stderr, "WARNING: workqueue_func called.\n");
+	dispatch_semaphore_signal(sema);
+}
+
+void (^cb)(void) = NULL;
+static void workqueue_func_kevent(void **buf, int *count){
+	pthread_priority_t p = (pthread_priority_t)pthread_getspecific(4);
+	fprintf(stderr, "\tthread with qos %s spawned.\n", describe_pri(p));
+
+	if (cb){
+		cb();
+	}
+
+	dispatch_semaphore_signal(sema);
+}
+
+int main(int argc, char *argv[]){
+	int ret = 0;
+	int exit_status = 0;
+
+	ret = _pthread_workqueue_init_with_kevent(workqueue_func, workqueue_func_kevent, 0, 0);
+	assert(ret == 0);
+
+	sema = dispatch_semaphore_create(0);
+	assert(sema != NULL);
+	timeout = dispatch_time(DISPATCH_TIME_NOW, 5LL * NSEC_PER_SEC);
+
+	_pthread_workqueue_set_event_manager_priority(_pthread_qos_class_encode(QOS_CLASS_UTILITY,0,0));
+
+	// one constrained thread
+	requests[0].priority = _pthread_qos_class_encode(QOS_CLASS_USER_INTERACTIVE, 0, 0);
+	requests[0].count = 1;
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+
+	// one overcommit thread
+	requests[0].priority = _pthread_qos_class_encode(QOS_CLASS_USER_INTERACTIVE, 0, _PTHREAD_PRIORITY_OVERCOMMIT_FLAG);
+	requests[0].count = 1;
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+
+	// one event manager
+	requests[0].priority = _pthread_qos_class_encode(QOS_CLASS_USER_INTERACTIVE, 0, _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG);
+	requests[0].count = 1;
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+
+	// one constrained thread
+	requests[0].priority = _pthread_qos_class_encode(QOS_CLASS_USER_INTERACTIVE, 0, 0);
+	requests[0].count = 1;
+
+	if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(1)) < 0) return ret;
+
+	// whole bunch of constrained threads
+
+	requests[0].priority = _pthread_qos_class_encode(QOS_CLASS_USER_INTERACTIVE, 0, 0);
+	requests[0].count = 1;
+
+	cb = ^{ 
+		// burn some CPU
+		for (int i = 0; i < 1000000; i++){
+			char c[32];
+			sprintf(c, "%d", i);
+		}
+	};
+	for (int i = 0; i < 8; i++)
+		if ((ret = do_req()) < 0) return ret;
+	if ((ret = do_wait(8)) < 0) return ret;
+	
+	return 0;
+}
diff --git a/tests/wq_kevent.h b/tests/wq_kevent.h
new file mode 100644
index 0000000..198e31d
--- /dev/null
+++ b/tests/wq_kevent.h
@@ -0,0 +1,59 @@
+#define REQUESTS_LEN 4
+static struct workq_reqthreads_req_s {unsigned long priority; int count;} requests[REQUESTS_LEN];
+
+#define QOS_STR(q) (q == QOS_CLASS_USER_INTERACTIVE ? "UInter" : (q == QOS_CLASS_USER_INITIATED ? "UInit" : (q == QOS_CLASS_DEFAULT ? "Dflt" : (q == QOS_CLASS_UTILITY ? "Util" : (q == QOS_CLASS_BACKGROUND ? "BG" : "Unkn" ) ) ) ) )
+static char* describe_pri(pthread_priority_t pri){
+	qos_class_t qos;
+	unsigned long flags;
+
+	qos = _pthread_qos_class_decode(pri, NULL, &flags);
+
+	static char desc[32];
+	if (flags & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG){
+		sprintf(desc, "E:%s", QOS_STR(qos));
+	} else if (flags & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG){
+		sprintf(desc, "O:%s", QOS_STR(qos));
+	} else {
+		sprintf(desc, "%s", QOS_STR(qos));
+	}
+
+	return desc;
+}
+
+static char* describe_req(void){
+	static char desc[256];
+	char *c = desc;
+
+	*c++ = '[';
+	for (int i = 0; i < REQUESTS_LEN; i++){
+		if (i) *c++ = ',';
+		c += sprintf(c, "{%s,%d}", describe_pri(requests[i].priority), requests[i].count);
+	}
+	*c++ = ']';
+	*c++ = '\0';
+
+	return desc;
+}
+
+static char *dummy_text = "Four score and seven years ago our fathers brought forth on this continent a new nation, conceived in liberty, and dedicated to the";
+// takes about 1us on my machine
+static void burn_cpu(void){
+	char key[64]; char txt[64];
+	strncpy(txt, dummy_text, 64);
+	for (int i = 0; i < 64; i++)
+		key[i] = rand() % 1;
+	setkey(key);
+	encrypt(txt, 0);
+	encrypt(txt, 1);
+}
+
+static int do_req(void){
+	int ret = sysctlbyname("debug.wq_kevent_test", NULL, NULL, requests, sizeof(requests));
+	if (ret >= 0){
+		fprintf(stderr, "wq_kevent_test(%s) -> %d\n", describe_req(), ret);
+	} else {
+		perror("debug.wk_kevent_test");
+		return errno;
+	}
+	return ret;
+}
diff --git a/tests/wq_kevent_stress.c b/tests/wq_kevent_stress.c
new file mode 100644
index 0000000..7d47def
--- /dev/null
+++ b/tests/wq_kevent_stress.c
@@ -0,0 +1,111 @@
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+#include <sys/qos.h>
+
+#include <dispatch/dispatch.h>
+
+#include "../private/workqueue_private.h"
+#include "../private/qos_private.h"
+
+#include "wq_kevent.h"
+
+static int rand_qos(){
+	switch (rand() % 5){
+		case 0: return QOS_CLASS_USER_INTERACTIVE;
+		case 1: return QOS_CLASS_USER_INITIATED;
+		case 2: return QOS_CLASS_DEFAULT;
+		case 3: return QOS_CLASS_UTILITY;
+		case 4: return QOS_CLASS_BACKGROUND;
+	}
+	return QOS_CLASS_UNSPECIFIED;
+}
+
+static void workqueue_func(pthread_priority_t priority){
+	fprintf(stderr, "WARNING: workqueue_func called.\n");
+}
+
+static void workqueue_func_kevent(void **buf, int *count){
+	pthread_priority_t p = (pthread_priority_t)pthread_getspecific(4);
+	fprintf(stderr, "\tthread with qos %s spawned (count: %d).\n", describe_pri(p), *count);
+
+	//struct timeval start, stop;
+	//gettimeofday(&start, NULL);
+
+	for (int i = 0; i < (rand() % 10000) * 1000 + 50000; i++){
+		burn_cpu();
+	}
+
+	//gettimeofday(&stop, NULL);
+	//fprintf(stderr, "\tthread exited %ld usec later\n", stop.tv_usec - start.tv_usec + (stop.tv_sec - start.tv_sec) * 1000000);
+}
+
+int main(int argc, char *argv[]){
+	int ret = 0;
+
+	ret = _pthread_workqueue_init_with_kevent(workqueue_func, workqueue_func_kevent, 0, 0);
+	assert(ret == 0);
+
+	int iteration = 0;
+	while (iteration++ < 1000){
+		switch (iteration % 5){
+			case 0:
+				// one event manager
+				bzero(requests, sizeof(requests));
+				requests[0].priority = _pthread_qos_class_encode(rand_qos(), 0, _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG);
+				requests[0].count = 1;
+
+				if ((ret = do_req()) < 0) return ret;
+				break;
+
+			case 1:
+				// one constrained thread
+				bzero(requests, sizeof(requests));
+				requests[0].priority = _pthread_qos_class_encode(rand_qos(), 0, 0);
+				requests[0].count = rand() % 2;
+				if (requests[0].count > 0 && (ret = do_req()) < 0) return ret;
+				break;
+
+			case 2:
+				// one event manager
+				bzero(requests, sizeof(requests));
+				requests[0].priority = _pthread_qos_class_encode(rand_qos(), 0, _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG);
+				requests[0].count = 1;
+
+				if ((ret = do_req()) < 0) return ret;
+				break;
+
+			case 3:
+				// one overcommit thread
+				bzero(requests, sizeof(requests));
+				requests[0].priority = _pthread_qos_class_encode(rand_qos(), 0, _PTHREAD_PRIORITY_OVERCOMMIT_FLAG);
+				requests[0].count = rand() % 2;
+
+				if (requests[0].count > 0 && (ret = do_req()) < 0) return ret;
+				break;
+
+			case 4:
+				// varied constrained threads
+				bzero(requests, sizeof(requests));
+				requests[0].priority = _pthread_qos_class_encode(QOS_CLASS_USER_INTERACTIVE, 0, 0);
+				requests[0].count = rand() % 4;
+				requests[1].priority = _pthread_qos_class_encode(QOS_CLASS_USER_INITIATED, 0, 0);
+				requests[1].count = rand() % 4;
+				requests[2].priority = _pthread_qos_class_encode(QOS_CLASS_UTILITY, 0, 0);
+				requests[2].count = rand() % 4;
+				requests[3].priority = _pthread_qos_class_encode(QOS_CLASS_BACKGROUND, 0, 0);
+				requests[3].count = rand() % 4;
+				if ((requests[0].count + requests[1].count + requests[2].count + requests[3].count) > 0 && (ret = do_req()) < 0) return ret;
+				break;
+		}
+		usleep(rand() % 100000);
+	}
+
+	return 0;
+}
diff --git a/tools/wqtrace.lua b/tools/wqtrace.lua
new file mode 100755
index 0000000..b68299c
--- /dev/null
+++ b/tools/wqtrace.lua
@@ -0,0 +1,279 @@
+#!/usr/local/bin/luatrace -s
+
+trace_codename = function(codename, callback)
+	local debugid = trace.debugid(codename)
+	if debugid ~= 0 then 
+		trace.single(debugid,callback) 
+	end
+end
+
+initial_timestamp = 0
+get_prefix = function(buf)
+	if initial_timestamp == 0 then
+		initial_timestamp = buf.timestamp
+	end
+	local prefix
+	if trace.debugid_is_start(buf.debugid) then 
+		prefix = "â" 
+	elseif trace.debugid_is_end(buf.debugid) then 
+		prefix = "â" 
+	else 
+		prefix = "â" 
+	end
+	local secs = (buf.timestamp - initial_timestamp)   / 1000 / 1000000
+	local usecs = (buf.timestamp - initial_timestamp) / 1000 % 1000000
+	return string.format("%s %6d.%06d %-16s[%06x] %-24s",
+		prefix, secs, usecs, buf.command, buf.threadid, buf.debugname)
+end
+
+parse_pthread_priority = function(pri)
+	local qos = bit32.rshift(bit32.band(pri, 0x00ffff00), 8)
+	if qos == 0x20 then
+		return "UInter"
+	elseif qos == 0x10 then
+		return "UInit"
+	elseif qos == 0x08 then
+		return "Dflt"
+	elseif qos == 0x04 then
+		return "Util"
+	elseif qos == 0x02 then
+		return "BG"
+	elseif qos == 0x01 then
+		return "Maint"
+	elseif qos == 0x00 then
+		return "Unsp"
+	else
+		return "Unkn"
+	end
+end
+
+-- workqueue lifecycle
+
+trace_codename("wq_pthread_exit", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tprocess is exiting\n",prefix)
+	else
+		printf("%s\tworkqueue marked as exiting and timer is complete\n",prefix)
+	end
+end)
+
+trace_codename("wq_workqueue_exit", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tall threads have exited, cleaning up\n",prefix)
+	else
+		printf("%s\tclean up complete\n",prefix)
+	end
+end)
+
+-- thread requests
+
+trace_codename("wq_kevent_req_threads", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tkevent requesting threads (requests[] length = %d)\n", prefix, buf.arg2)
+	else
+		printf("%s\tkevent request complete (start_timer: %d)\n", prefix, buf.arg2)
+	end
+end)
+
+trace_codename("wq_req_threads", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\trecording %d constrained request(s) at %s, total %d requests\n",
+		prefix, buf.arg4, parse_pthread_priority(buf.arg2), buf.arg3)
+end)
+
+trace_codename("wq_req_octhreads", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tattempting %d overcommit request(s) at %s, total %d requests\n",
+		prefix, buf.arg4, parse_pthread_priority(buf.arg2), buf.arg3)
+end)
+trace_codename("wq_delay_octhreads", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\trecording %d delayed overcommit request(s) at %s, total %d requests\n",
+		prefix, buf.arg4, parse_pthread_priority(buf.arg2), buf.arg3)
+end)
+
+trace_codename("wq_req_kevent_threads", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\trecording kevent constrained request at %s, total %d requests\n",
+		prefix, parse_pthread_priority(buf.arg2), buf.arg3)
+end)
+trace_codename("wq_req_kevent_octhreads", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\trecording kevent overcommit request at %s, total %d requests\n",
+		prefix, parse_pthread_priority(buf.arg2), buf.arg3)
+end)
+trace_codename("wq_req_event_manager", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\trecording event manager request at %s, existing at %d, %d running\n",
+		prefix, parse_pthread_priority(buf.arg2), buf.arg3, buf.arg4)
+end)
+
+trace_codename("wq_start_add_timer", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tarming timer to fire in %d us (flags: %x, reqcount: %d)\n",
+		prefix, buf.arg4, buf.arg3, buf.arg2)
+end)
+
+trace_codename("wq_add_timer", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tadd_timer fired (flags: %x, nthreads: %d, thidlecount: %d)\n",
+			prefix, buf.arg2, buf.arg3, buf.arg4)
+	elseif trace.debugid_is_end(buf.debugid) then
+		printf("%s\tadd_timer completed (start_timer: %x, nthreads: %d, thidlecount: %d)\n",
+			prefix, buf.arg2, buf.arg3, buf.arg4)
+	else
+		printf("%s\tadd_timer added threads (reqcount: %d, thidlecount: %d, busycount: %d)\n",
+			prefix, buf.arg2, buf.arg3, buf.arg4)
+
+	end
+end)
+
+trace_codename("wq_overcommitted", function(buf)
+	local prefix = get_prefix(buf)
+	if bit32.band(buf.arg2, 0x80) then
+		printf("%s\tworkqueue overcimmitted @ %s, starting timer (thactive_count: %d, busycount; %d)",
+			prefix, parse_pthread_priority(buf.arg2), buf.arg3, buf.arg4)
+	else
+		printf("%s\tworkqueue overcimmitted @ %s (thactive_count: %d, busycount; %d)",
+			prefix, parse_pthread_priority(buf.arg2), buf.arg3, buf.arg4)
+	end
+end)
+
+trace_codename("wq_stalled", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tworkqueue stalled (nthreads: %d)\n", prefix, buf.arg3)
+end)
+
+-- thread lifecycle
+
+trace_codename("wq_run_nextitem", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		if buf.arg2 == 0 then
+			printf("%s\tthread %d looking for next request (idlecount: %d, reqcount: %d)\n",
+				prefix, buf.threadid, buf.arg3, buf.arg4)
+		else
+			printf("%s\ttrying to run a request on an idle thread (idlecount: %d, reqcount: %d)\n",
+				prefix, buf.arg3, buf.arg4)
+		end
+	else
+		if buf.arg4 == 1 then
+			printf("%s\tkicked off work on thread %d (overcommit: %d)\n", prefix, buf.arg2, buf.arg3)
+		elseif buf.arg4 == 2 then
+			printf("%s\tno work/threads (start_timer: %d)\n", prefix, buf.arg3)
+		elseif buf.arg4 == 3 then
+			printf("%s\tthread parked\n", prefix)
+		elseif buf.arg4 == 4 then
+			printf("%s\treturning with new request\n", prefix)
+		else
+			printf("%s\tWARNING: UNKNOWN END CODE:%d\n", prefix, buf.arg4)
+		end
+	end
+end)
+
+trace_codename("wq_runitem", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\trunning an item at %s (flags: %x)\n", prefix, parse_pthread_priority(buf.arg3), buf.arg2)
+	else
+		printf("%s\tthread returned\n", prefix)
+	end
+end)
+
+trace_codename("wq_thread_yielded", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tthread_yielded called (yielded_count: %d, reqcount: %d)\n",
+			prefix, buf.arg2, buf.arg3)
+	else
+		if (buf.arg4 == 1) then
+			printf("%s\tthread_yielded completed kicking thread (yielded_count: %d, reqcount: %d)\n",
+				prefix, buf.arg2, buf.arg3)
+		elseif (buf.arg4 == 2) then
+			printf("%s\tthread_yielded completed (yielded_count: %d, reqcount: %d)\n",
+				prefix, buf.arg2, buf.arg3)
+		else
+			printf("%s\tthread_yielded completed unusually (yielded_count: %d, reqcount: %d)\n",
+				prefix, buf.arg2, buf.arg3)
+		end
+	end
+end)
+
+trace_codename("wq_thread_block", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tthread blocked (activecount: %d, prioritiy: %d, start_time: %d)\n",
+			prefix, buf.arg2, buf.arg3, buf.arg3)
+	else
+		printf("%s\tthread unblocked (threads_scheduled: %d, priority: %d)\n",
+			prefix, buf.arg2, buf.arg3)
+	end
+end)
+
+trace_codename("wq_thread_suspend", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tcreated new suspended thread (nthreads:%d)\n",
+			prefix, buf.arg2)
+	else
+		if buf.arg4 == 0xdead then
+			printf("%s\tthread exited suspension to die (nthreads: %d)\n",
+				prefix, buf.arg3)
+		end
+	end
+end)
+
+trace_codename("wq_thread_park", function(buf)
+	local prefix = get_prefix(buf)
+	if trace.debugid_is_start(buf.debugid) then
+		printf("%s\tthread parked (threads_scheduled: %d, thidlecount: %d, us_to_wait: %d)\n",
+			prefix, buf.arg2, buf.arg3, buf.arg4)
+	else
+		if buf.arg4 == 0xdead then
+			printf("%s\tthread exited park to die (nthreads: %d)\n", prefix, buf.arg3)
+		end
+	end
+
+end)
+
+trace_codename("wq_thread_limit_exceeded", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\ttotal thread limit exceeded, %d threads, total %d max threads, (kern limit: %d)\n",
+		prefix, buf.arg2, buf.arg3, buf.arg4)
+end)
+
+trace_codename("wq_thread_constrained_maxed", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tattempted to add thread at max constrained limit, total %d threads (limit: %d)\n",
+		prefix, buf.arg2, buf.arg3)
+end)
+
+trace_codename("wq_thread_add_during_exit", function(buf)
+	local prefix = get_prefix(buf)
+	printf("%s\tattempted to add thread during WQ_EXITING\n", prefix)
+end)
+
+trace_codename("wq_thread_create_failed", function(buf)
+	local prefix = get_prefix(buf)
+	if buf.arg3 == 0 then
+		printf("%s\tfailed to create new workqueue thread, kern_return: 0x%x\n",
+			prefix, buf.arg2)
+	elseif buf.arg3 == 1 then
+		printf("%s\tfailed to vm_map workq thread stack: 0x%x", prefix, buf.arg2)
+	elseif buf.arg3 == 2 then
+		printf("%s\tfailed to vm_protect workq thread guardsize: 0x%x", prefix, buf.arg2)
+	end
+end)
+
+
+-- The trace codes we need aren't enabled by default
+darwin.sysctlbyname("kern.pthread_debug_tracing", 1)
+completion_handler = function()
+	darwin.sysctlbyname("kern.pthread_debug_tracing", 0)
+end
+trace.set_completion_handler(completion_handler)
+
diff --git a/xcodescripts/install-manpages.sh b/xcodescripts/install-manpages.sh
index 237872e..97ddb18 100644
--- a/xcodescripts/install-manpages.sh
+++ b/xcodescripts/install-manpages.sh
@@ -66,6 +66,7 @@ BASE_PAGES="pthread.3 \
 	pthread_join.3 \
 	pthread_key_create.3 \
 	pthread_key_delete.3 \
+	pthread_main_np.3 \
 	pthread_mutex_destroy.3 \
 	pthread_mutex_init.3 \
 	pthread_mutex_lock.3 \
diff --git a/xcodescripts/install-symlinks.sh b/xcodescripts/install-symlinks.sh
index 5c20007..cc4fb2c 100644
--- a/xcodescripts/install-symlinks.sh
+++ b/xcodescripts/install-symlinks.sh
@@ -22,7 +22,6 @@
 #
 
 if [ "$ACTION" = build ]; then exit 0; fi
-DSTROOT="$DSTROOT$INSTALL_PATH_PREFIX"
 
 #
 # Symlink old header locations.
diff --git a/xcodescripts/install-sys-headers.sh b/xcodescripts/install-sys-headers.sh
index 0e3f6b3..ca631d1 100644
--- a/xcodescripts/install-sys-headers.sh
+++ b/xcodescripts/install-sys-headers.sh
@@ -24,7 +24,6 @@
 set -e
 
 if [ "$ACTION" = build ]; then exit 0; fi
-DSTROOT="$DSTROOT$INSTALL_PATH_PREFIX"
 
 DESTDIR="$DSTROOT/usr/include/sys"
 mkdir -p "$DESTDIR"
diff --git a/xcodescripts/kext.xcconfig b/xcodescripts/kext.xcconfig
index dd41529..4aa260e 100644
--- a/xcodescripts/kext.xcconfig
+++ b/xcodescripts/kext.xcconfig
@@ -1,7 +1,7 @@
 // pthread kext build options
 
 ARCHS = $(ARCHS_STANDARD_32_64_BIT)
-SUPPORTED_PLATFORMS = macosx iphoneos
+SUPPORTED_PLATFORMS = macosx iphoneos appletvos watchos
 DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
 INSTALL_PATH = $(SYSTEM_LIBRARY_DIR)/Extensions
 MODULE_NAME = com.apple.kec.pthread
@@ -19,8 +19,8 @@ GCC_C_LANGUAGE_STANDARD = gnu99
 CLANG_CXX_LANGUAGE_STANDARD = gnu++0x
 CLANG_CXX_LIBRARY = libc++
 GCC_PRECOMPILE_PREFIX_HEADER = YES
-GCC_PREPROCESSOR_DEFINITIONS = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T
-//GCC_OPTIMIZATION_LEVEL = 0
+GCC_PREPROCESSOR_DEFINITIONS_kext = XNU_KERNEL_PRIVATE MACH_KERNEL_PRIVATE ABSOLUTETIME_SCALAR_TYPE NEEDS_SCHED_CALL_T
+GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS_kext)
 
 GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES
 GCC_TREAT_INCOMPATIBLE_POINTER_TYPE_WARNINGS_AS_ERRORS = YES
diff --git a/xcodescripts/kext_debug.xcconfig b/xcodescripts/kext_debug.xcconfig
new file mode 100644
index 0000000..7098463
--- /dev/null
+++ b/xcodescripts/kext_debug.xcconfig
@@ -0,0 +1,4 @@
+#include "kext.xcconfig"
+
+GCC_OPTIMIZATION_LEVEL = 0
+GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS_kext) MACH_ASSERT DEBUG
\ No newline at end of file
diff --git a/xcodescripts/pthread.xcconfig b/xcodescripts/pthread.xcconfig
index 7c2c274..b1bbdfd 100644
--- a/xcodescripts/pthread.xcconfig
+++ b/xcodescripts/pthread.xcconfig
@@ -1,17 +1,14 @@
 #include "<DEVELOPER_DIR>/Makefiles/CoreOS/Xcode/BSD.xcconfig"
-#include "<DEVELOPER_DIR>/AppleInternal/XcodeConfig/SimulatorSupport.xcconfig"
-
-// Set INSTALL_PATH[sdk=macosx*] when SimulatorSupport.xcconfig is unavailable
-INSTALL_PATH[sdk=macosx*] = $(INSTALL_PATH_ACTUAL)
 
 // Standard settings
-SUPPORTED_PLATFORMS = macosx iphoneos iphonesimulator
-SRCROOT_SEARCH_PATHS = $(SRCROOT) $(SRCROOT)/private
+SDKROOT = macosx.internal
+SUPPORTED_PLATFORMS = macosx iphoneos iphonesimulator appletvos appletvsimulator watchos watchsimulator
+SRCROOT_SEARCH_PATHS = $(SRCROOT) $(SRCROOT)/private $(SRCROOT)/os
 SYSTEM_FRAMEWORK_HEADERS = $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
-HEADER_SEARCH_PATHS = $($(TARGET_NAME)_SEARCH_PATHS) $(SRCROOT_SEARCH_PATHS) $(SYSTEM_FRAMEWORK_HEADERS) $(SDKROOT)/usr/local/include $(inherited)
+HEADER_SEARCH_PATHS = $($(PRODUCT_NAME)_SEARCH_PATHS) $(SRCROOT_SEARCH_PATHS) $(SYSTEM_FRAMEWORK_HEADERS) $(SDKROOT)/usr/local/include $(inherited)
 ALWAYS_SEARCH_USER_PATHS = YES
 USE_HEADERMAP = NO
-BUILD_VARIANTS = normal
+BUILD_VARIANTS = normal debug
 
 GCC_OPTIMIZATION_LEVEL = s
 GCC_C_LANGUAGE_STANDARD = gnu99
@@ -39,19 +36,20 @@ DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
 DYLIB_COMPATIBILITY_VERSION = 1
 
 // Installation paths
-INSTALL_PATH_ACTUAL = /usr/lib/system
-PUBLIC_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/include/pthread
-PRIVATE_HEADERS_FOLDER_PATH = $(INSTALL_PATH_PREFIX)/usr/local/include/pthread
+INSTALL_PATH = /usr/lib/system
+PUBLIC_HEADERS_FOLDER_PATH = /usr/include/pthread
+PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/pthread
 SKIP_INSTALL = NO
 
 // Base definitions
 // TODO: Remove -fstack-protector on _debug when it is moved to libplatform
 LINK_WITH_STANDARD_LIBRARIES = NO
 BASE_PREPROCESSOR_MACROS = __LIBC__ __DARWIN_UNIX03=1 __DARWIN_64_BIT_INO_T=1 __DARWIN_NON_CANCELABLE=1 __DARWIN_VERS_1050=1 _FORTIFY_SOURCE=0 __PTHREAD_BUILDING_PTHREAD__=1 $(SIM_PREPROCESSOR_MACROS)
-OTHER_CFLAGS = -fno-stack-protector -fdollars-in-identifiers -fno-common -fno-builtin -momit-leaf-frame-pointer $($(TARGET_NAME)_CFLAGS)
+GCC_PREPROCESSOR_DEFINITIONS = $(BASE_PREPROCESSOR_MACROS)
+
+OTHER_CFLAGS = -fno-stack-protector -fdollars-in-identifiers -fno-common -fno-builtin -momit-leaf-frame-pointer $($(PRODUCT_NAME)_CFLAGS)
 OTHER_CFLAGS_debug = -fno-stack-protector -fno-inline -O0 -DDEBUG=1
 OTHER_LDFLAGS = -Wl,-alias_list,$(SRCROOT)/xcodescripts/pthread.aliases -Wl,-umbrella,System -L/usr/lib/system -lsystem_kernel -lsystem_platform -ldyld -lcompiler_rt $(UPLINK_LDFLAGS) $(CR_LDFLAGS)
-GCC_PREPROCESSOR_DEFINITIONS = $(BASE_PREPROCESSOR_MACROS)
 
 // CrashReporter
 CR_LDFLAGS = -lCrashReporterClient
@@ -60,6 +58,6 @@ ORDER_FILE = $(SDKROOT)/$(APPLE_INTERNAL_DIR)/OrderFiles/libsystem_pthread.order
 ORDER_FILE[sdk=iphonesimulator*] =
 
 // Simulator build rules
-EXCLUDED_SOURCE_FILE_NAMES[sdk=iphonesimulator*] = *.c *.d *.s
+EXCLUDED_SOURCE_FILE_NAMES[sdk=iphonesimulator*] = *.c *.s
 SKIP_INSTALL[sdk=iphonesimulator*] = YES
 OTHER_LDFLAGS[sdk=iphonesimulator*] =
diff --git a/xcodescripts/pthread_debug.xcconfig b/xcodescripts/pthread_debug.xcconfig
new file mode 100644
index 0000000..e86b9b0
--- /dev/null
+++ b/xcodescripts/pthread_debug.xcconfig
@@ -0,0 +1,4 @@
+#include "pthread.xcconfig"
+
+BUILD_VARIANTS = normal debug
+OTHER_CFLAGS = $(OTHER_CFLAGS_debug)
diff --git a/xcodescripts/pthread_introspection.xcconfig b/xcodescripts/pthread_introspection.xcconfig
new file mode 100644
index 0000000..d849c60
--- /dev/null
+++ b/xcodescripts/pthread_introspection.xcconfig
@@ -0,0 +1,8 @@
+#include "pthread.xcconfig"
+
+BUILD_VARIANTS = normal
+INSTALL_PATH = /usr/lib/system/introspection
+
+GCC_PREPROCESSOR_DEFINITIONS = $(BASE_PREPROCESSOR_MACROS) PLOCKSTAT=1
+CONFIGURATION_BUILD_DIR = $(BUILD_DIR)/introspection
+system_pthread_CFLAGS = -mno-omit-leaf-frame-pointer
-- 
2.45.2