X-Git-Url: https://git.saurik.com/apple/libpthread.git/blobdiff_plain/f1a1da6cf65a9d0e6858678f6c259025cf5d27fd..a03d92013c2a24ebc4e02e5d1f575787f8467f6b:/src/pthread.c?ds=sidebyside

diff --git a/src/pthread.c b/src/pthread.c
index de2d6db..1377429 100644
--- a/src/pthread.c
+++ b/src/pthread.c
@@ -54,6 +54,7 @@
 #include "workqueue_private.h"
 #include "introspection_private.h"
 #include "qos_private.h"
+#include "tsd_private.h"
 
 #include <stdlib.h>
 #include <errno.h>
@@ -96,7 +97,7 @@ int __unix_conforming = 0;
 // _pthread_list_lock protects _pthread_count, access to the __pthread_head
 // list, and the parentcheck, childrun and childexit flags of the pthread
 // structure. Externally imported by pthread_cancelable.c.
-__private_extern__ pthread_lock_t _pthread_list_lock = LOCK_INITIALIZER;
+__private_extern__ _pthread_lock _pthread_list_lock = _PTHREAD_LOCK_INITIALIZER;
 __private_extern__ struct __pthread_list __pthread_head = TAILQ_HEAD_INITIALIZER(__pthread_head);
 static int _pthread_count = 1;
 
@@ -122,9 +123,20 @@ typedef struct _pthread_reap_msg_t {
 	mach_msg_trailer_t trailer;
 } pthread_reap_msg_t;
 
-#define pthreadsize ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
+/* 
+ * The pthread may be offset into a page.  In that event, by contract
+ * with the kernel, the allocation will extend PTHREAD_SIZE from the
+ * start of the next page.  There's also one page worth of allocation
+ * below stacksize for the guard page. <rdar://problem/19941744> 
+ */
+#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
+#define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size)
+#define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize))
+
 static pthread_attr_t _pthread_attr_default = {0};
-static struct _pthread _thread = {0};
+
+// The main thread's pthread_t
+static struct _pthread _thread __attribute__((aligned(64))) = {0};
 
 static int default_priority;
 static int max_priority;
@@ -133,6 +145,7 @@ static int pthread_concurrency;
 
 // work queue support data
 static void (*__libdispatch_workerfunction)(pthread_priority_t) = NULL;
+static void (*__libdispatch_keventfunction)(void **events, int *nevents) = NULL;
 static int __libdispatch_offset;
 
 // supported feature set
@@ -152,13 +165,15 @@ static void _pthread_struct_init(pthread_t t,
 	const pthread_attr_t *attrs,
 	void *stack,
 	size_t stacksize,
-	int kernalloc);
+	void *freeaddr,
+	size_t freesize);
 
 extern void _pthread_set_self(pthread_t);
+static void _pthread_set_self_internal(pthread_t, bool needs_tsd_base_set);
 
 static void _pthread_dealloc_reply_port(pthread_t t);
 
-static inline void __pthread_add_thread(pthread_t t, bool parent);
+static inline void __pthread_add_thread(pthread_t t, bool parent, bool from_mach_thread);
 static inline int __pthread_remove_thread(pthread_t t, bool child, bool *should_exit);
 
 static int _pthread_find_thread(pthread_t thread);
@@ -171,8 +186,8 @@ static inline void _pthread_introspection_thread_start(pthread_t t);
 static inline void _pthread_introspection_thread_terminate(pthread_t t, void *freeaddr, size_t freesize, bool destroy);
 static inline void _pthread_introspection_thread_destroy(pthread_t t);
 
-extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse);
-extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
+extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse); // trampoline into _pthread_wqthread
+extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); // trampoline into _pthread_start
 
 void pthread_workqueue_atfork_child(void);
 
@@ -192,11 +207,12 @@ _________________________________________
 -----------------------------------------
 */
 
-#define PTHREAD_START_CUSTOM	0x01000000
-#define PTHREAD_START_SETSCHED	0x02000000
-#define PTHREAD_START_DETACHED	0x04000000
-#define PTHREAD_START_QOSCLASS	0x08000000
-#define PTHREAD_START_QOSCLASS_MASK 0xffffff
+#define PTHREAD_START_CUSTOM		0x01000000
+#define PTHREAD_START_SETSCHED		0x02000000
+#define PTHREAD_START_DETACHED		0x04000000
+#define PTHREAD_START_QOSCLASS		0x08000000
+#define PTHREAD_START_TSD_BASE_SET	0x10000000
+#define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
 #define PTHREAD_START_POLICY_BITSHIFT 16
 #define PTHREAD_START_POLICY_MASK 0xff
 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
@@ -218,10 +234,10 @@ static const mach_vm_address_t PTHREAD_STACK_HINT = 0xB0000000;
 #error no PTHREAD_STACK_HINT for this architecture
 #endif
 
-#ifdef __i386__
-// Check for regression of <rdar://problem/13249323>
-struct rdar_13249323_regression_static_assert { unsigned a[offsetof(struct _pthread, err_no) == 68 ? 1 : -1]; };
-#endif
+// Check that offsets of _PTHREAD_STRUCT_DIRECT_*_OFFSET values hasn't changed
+_Static_assert(offsetof(struct _pthread, tsd) + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET
+		== offsetof(struct _pthread, thread_id),
+		"_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET is correct");
 
 // Allocate a thread structure, stack and guard page.
 //
@@ -256,11 +272,11 @@ _pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack)
 	if (attrs->stackaddr != NULL) {
 		PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0);
 		*stack = attrs->stackaddr;
-		allocsize = pthreadsize;
+		allocsize = PTHREAD_SIZE;
 	} else {
 		guardsize = attrs->guardsize;
 		stacksize = attrs->stacksize;
-		allocsize = stacksize + guardsize + pthreadsize;
+		allocsize = stacksize + guardsize + PTHREAD_SIZE;
 	}
 	
 	kr = mach_vm_map(mach_task_self(),
@@ -300,9 +316,9 @@ _pthread_allocate(pthread_t *thread, const pthread_attr_t *attrs, void **stack)
 	}
 	
 	if (t != NULL) {
-		_pthread_struct_init(t, attrs, *stack, 0, 0);
-		t->freeaddr = (void *)allocaddr;
-		t->freesize = allocsize;
+		_pthread_struct_init(t, attrs,
+				     *stack, attrs->stacksize,
+				     allocaddr, allocsize);
 		*thread = t;
 		res = 0;
 	} else {
@@ -316,46 +332,83 @@ _pthread_deallocate(pthread_t t)
 {
 	// Don't free the main thread.
 	if (t != &_thread) {
-		(void)mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
+		kern_return_t ret;
+		ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
+		PTHREAD_ASSERT(ret == KERN_SUCCESS);
 	}
 	return 0;
 }
 
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wreturn-stack-address"
+
+PTHREAD_NOINLINE
+static void*
+_current_stack_address(void)
+{
+	int a;
+	return &a;
+}
+
+#pragma clang diagnostic pop
+
 // Terminates the thread if called from the currently running thread.
-PTHREAD_NORETURN
+PTHREAD_NORETURN PTHREAD_NOINLINE
 static void
 _pthread_terminate(pthread_t t)
 {
 	PTHREAD_ASSERT(t == pthread_self());
-	
+
 	uintptr_t freeaddr = (uintptr_t)t->freeaddr;
-	size_t freesize = t->freesize - pthreadsize;
+	size_t freesize = t->freesize;
+
+	// the size of just the stack
+	size_t freesize_stack = t->freesize;
+
+	// We usually pass our structure+stack to bsdthread_terminate to free, but
+	// if we get told to keep the pthread_t structure around then we need to
+	// adjust the free size and addr in the pthread_t to just refer to the
+	// structure and not the stack.  If we do end up deallocating the
+	// structure, this is useless work since no one can read the result, but we
+	// can't do it after the call to pthread_remove_thread because it isn't
+	// safe to dereference t after that.
+	if ((void*)t > t->freeaddr && (void*)t < t->freeaddr + t->freesize){
+		// Check to ensure the pthread structure itself is part of the
+		// allocation described by freeaddr/freesize, in which case we split and
+		// only deallocate the area below the pthread structure.  In the event of a
+		// custom stack, the freeaddr/size will be the pthread structure itself, in
+		// which case we shouldn't free anything (the final else case).
+		freesize_stack = trunc_page((uintptr_t)t - (uintptr_t)freeaddr);
+
+		// describe just the remainder for deallocation when the pthread_t goes away
+		t->freeaddr += freesize_stack;
+		t->freesize -= freesize_stack;
+	} else if (t == &_thread){
+		freeaddr = t->stackaddr - pthread_get_stacksize_np(t);
+		uintptr_t stackborder = trunc_page((uintptr_t)_current_stack_address());
+		freesize_stack = stackborder - freeaddr;
+	} else {
+		freesize_stack = 0;
+	}
 
 	mach_port_t kport = _pthread_kernel_thread(t);
 	semaphore_t joinsem = t->joiner_notify;
 
 	_pthread_dealloc_reply_port(t);
 
-	// Shrink the pthread_t so that it does not include the stack
-	// so that we're always responsible for deallocating the stack.
-	t->freeaddr += freesize;
-	t->freesize = pthreadsize;
-
-	// After the call to __pthread_remove_thread, it is only safe to
-	// dereference the pthread_t structure if EBUSY has been returned.
+	// After the call to __pthread_remove_thread, it is not safe to
+	// dereference the pthread_t structure.
 
 	bool destroy, should_exit;
 	destroy = (__pthread_remove_thread(t, true, &should_exit) != EBUSY);
 
-	if (t == &_thread) {
-		// Don't free the main thread.
-		freesize = 0;
-	} else if (destroy) {
-		// We were told not to keep the pthread_t structure around, so
-		// instead of just deallocating the stack, we should deallocate
-		// the entire structure.
-		freesize += pthreadsize;
+	if (!destroy || t == &_thread) {
+		// Use the adjusted freesize of just the stack that we computed above.
+		freesize = freesize_stack;
 	}
+
+	// Check if there is nothing to free because the thread has a custom
+	// stack allocation and is joinable.
 	if (freesize == 0) {
 		freeaddr = 0;
 	}
@@ -633,21 +686,30 @@ pthread_attr_getguardsize(const pthread_attr_t *attr, size_t *guardsize)
 /*
  * Create and start execution of a new thread.
  */
-
+PTHREAD_NOINLINE
 static void
-_pthread_body(pthread_t self)
+_pthread_body(pthread_t self, bool needs_tsd_base_set)
 {
-	_pthread_set_self(self);
-	__pthread_add_thread(self, false);
-	_pthread_exit(self, (self->fun)(self->arg));
+	_pthread_set_self_internal(self, needs_tsd_base_set);
+	__pthread_add_thread(self, false, false);
+	void *result = (self->fun)(self->arg);
+
+	_pthread_exit(self, result);
 }
 
 void
-_pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void *arg, size_t stacksize, unsigned int pflags)
+_pthread_start(pthread_t self,
+	       mach_port_t kport,
+	       void *(*fun)(void *),
+	       void *arg,
+	       size_t stacksize,
+	       unsigned int pflags)
 {
 	if ((pflags & PTHREAD_START_CUSTOM) == 0) {
 		void *stackaddr = self;
-		_pthread_struct_init(self, &_pthread_attr_default, stackaddr, stacksize, 1);
+		_pthread_struct_init(self, &_pthread_attr_default,
+				stackaddr, stacksize,
+				PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize));
 
 		if (pflags & PTHREAD_START_SETSCHED) {
 			self->policy = ((pflags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK);
@@ -671,35 +733,37 @@ _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void *ar
 		self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
 	}
 
+	bool thread_tsd_bsd_set = (bool)(pflags & PTHREAD_START_TSD_BASE_SET);
+
 	_pthread_set_kernel_thread(self, kport);
 	self->fun = fun;
 	self->arg = arg;
-	
-	_pthread_body(self);
+
+	_pthread_body(self, !thread_tsd_bsd_set);
 }
 
 static void
 _pthread_struct_init(pthread_t t,
 		     const pthread_attr_t *attrs,
-		     void *stack,
+		     void *stackaddr,
 		     size_t stacksize,
-		     int kernalloc)
+		     void *freeaddr,
+		     size_t freesize)
 {
+#if DEBUG
+	PTHREAD_ASSERT(t->sig != _PTHREAD_SIG);
+#endif
+
 	t->sig = _PTHREAD_SIG;
 	t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = t;
 	t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
-	LOCK_INIT(t->lock);
-	t->kernalloc = kernalloc;
-	if (kernalloc != 0) {
-		uintptr_t stackaddr = (uintptr_t)t;
-		t->stacksize = stacksize;
-		t->stackaddr = (void *)stackaddr;
-		t->freeaddr = (void *)(uintptr_t)(stackaddr - stacksize - vm_page_size);
-		t->freesize = pthreadsize + stacksize + vm_page_size;
-	} else {
-		t->stacksize = attrs->stacksize;
-		t->stackaddr = (void *)stack;
-	}
+	_PTHREAD_LOCK_INIT(t->lock);
+
+	t->stackaddr = stackaddr;
+	t->stacksize = stacksize;
+	t->freeaddr = freeaddr;
+	t->freesize = freesize;
+
 	t->guardsize = attrs->guardsize;
 	t->detached = attrs->detached;
 	t->inherit = attrs->inherit;
@@ -752,7 +816,7 @@ pthread_from_mach_thread_np(mach_port_t kernel_thread)
 	struct _pthread *p = NULL;
 
 	/* No need to wait as mach port is already known */
-	LOCK(_pthread_list_lock);
+	_PTHREAD_LOCK(_pthread_list_lock);
 
 	TAILQ_FOREACH(p, &__pthread_head, plist) {
 		if (_pthread_kernel_thread(p) == kernel_thread) {
@@ -760,7 +824,7 @@ pthread_from_mach_thread_np(mach_port_t kernel_thread)
 		}
 	}
 
-	UNLOCK(_pthread_list_lock);
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 
 	return p;
 }
@@ -774,13 +838,44 @@ pthread_get_stacksize_np(pthread_t t)
 	if (t == NULL) {
 		return ESRCH; // XXX bug?
 	}
-	
-	// since the main thread will not get de-allocated from underneath us
+
+#if !defined(__arm__) && !defined(__arm64__)
+	// The default rlimit based allocations will be provided with a stacksize
+	// of the current limit and a freesize of the max.  However, custom
+	// allocations will just have the guard page to free.  If we aren't in the
+	// latter case, call into rlimit to determine the current stack size.  In
+	// the event that the current limit == max limit then we'll fall down the
+	// fast path, but since it's unlikely that the limit is going to be lowered
+	// after it's been change to the max, we should be fine.
+	//
+	// Of course, on arm rlim_cur == rlim_max and there's only the one guard
+	// page.  So, we can skip all this there.
+	if (t == &_thread && t->stacksize + vm_page_size != t->freesize) {
+		// We want to call getrlimit() just once, as it's relatively expensive
+		static size_t rlimit_stack;
+		
+		if (rlimit_stack == 0) {
+			struct rlimit limit;
+			int ret = getrlimit(RLIMIT_STACK, &limit);
+			
+			if (ret == 0) {
+				rlimit_stack = (size_t) limit.rlim_cur;
+			}
+		}
+		
+		if (rlimit_stack == 0 || rlimit_stack > t->freesize) {
+			return t->stacksize;
+		} else {
+			return rlimit_stack;
+		}
+	}
+#endif /* !defined(__arm__) && !defined(__arm64__) */
+
 	if (t == pthread_self() || t == &_thread) {
 		return t->stacksize;
 	}
 
-	LOCK(_pthread_list_lock);
+	_PTHREAD_LOCK(_pthread_list_lock);
 
 	ret = _pthread_find_thread(t);
 	if (ret == 0) {
@@ -789,7 +884,7 @@ pthread_get_stacksize_np(pthread_t t)
 		size = ret; // XXX bug?
 	}
 
-	UNLOCK(_pthread_list_lock);
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 
 	return size;
 }
@@ -809,7 +904,7 @@ pthread_get_stackaddr_np(pthread_t t)
 		return t->stackaddr;
 	}
 
-	LOCK(_pthread_list_lock);
+	_PTHREAD_LOCK(_pthread_list_lock);
 
 	ret = _pthread_find_thread(t);
 	if (ret == 0) {
@@ -818,7 +913,7 @@ pthread_get_stackaddr_np(pthread_t t)
 		addr = (void *)(uintptr_t)ret; // XXX bug?
 	}
 
-	UNLOCK(_pthread_list_lock);
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 
 	return addr;
 }
@@ -889,12 +984,12 @@ pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
 	if (thread == NULL || thread == self) {
 		*thread_id = self->thread_id;
 	} else {
-		LOCK(_pthread_list_lock);
+		_PTHREAD_LOCK(_pthread_list_lock);
 		res = _pthread_find_thread(thread);
 		if (res == 0) {
 			*thread_id = thread->thread_id;
 		}
-		UNLOCK(_pthread_list_lock);
+		_PTHREAD_UNLOCK(_pthread_list_lock);
 	}
 	return res;
 }
@@ -908,12 +1003,12 @@ pthread_getname_np(pthread_t thread, char *threadname, size_t len)
 		return ESRCH;
 	}
 
-	LOCK(_pthread_list_lock);
+	_PTHREAD_LOCK(_pthread_list_lock);
 	res = _pthread_find_thread(thread);
 	if (res == 0) {
 		strlcpy(threadname, thread->pthread_name, len);
 	}
-	UNLOCK(_pthread_list_lock);
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 	return res;
 }
 
@@ -943,12 +1038,16 @@ pthread_setname_np(const char *name)
 
 PTHREAD_ALWAYS_INLINE
 static inline void
-__pthread_add_thread(pthread_t t, bool parent)
+__pthread_add_thread(pthread_t t, bool parent, bool from_mach_thread)
 {
 	bool should_deallocate = false;
 	bool should_add = true;
 
-	LOCK(_pthread_list_lock);
+	if (from_mach_thread){
+		_PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
+	} else {
+		_PTHREAD_LOCK(_pthread_list_lock);
+	}
 
 	// The parent and child threads race to add the thread to the list.
 	// When called by the parent:
@@ -963,7 +1062,7 @@ __pthread_add_thread(pthread_t t, bool parent)
 			// child got here first, don't add.
 			should_add = false;
 		}
-		
+
 		// If the child exits before we check in then it has to keep
 		// the thread structure memory alive so our dereferences above
 		// are valid. If it's a detached thread, then no joiner will
@@ -989,10 +1088,20 @@ __pthread_add_thread(pthread_t t, bool parent)
 		_pthread_count++;
 	}
 
-	UNLOCK(_pthread_list_lock);
+	if (from_mach_thread){
+		_PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
+	} else {
+		_PTHREAD_UNLOCK(_pthread_list_lock);
+	}
 
 	if (parent) {
-		_pthread_introspection_thread_create(t, should_deallocate);
+		if (!from_mach_thread) {
+			// PR-26275485: Mach threads will likely crash trying to run
+			// introspection code.  Since the fall out from the introspection
+			// code not seeing the injected thread is likely less than crashing
+			// in the introspection code, just don't make the call.
+			_pthread_introspection_thread_create(t, should_deallocate);
+		}
 		if (should_deallocate) {
 			_pthread_deallocate(t);
 		}
@@ -1012,7 +1121,7 @@ __pthread_remove_thread(pthread_t t, bool child, bool *should_exit)
 	
 	bool should_remove = true;
 
-	LOCK(_pthread_list_lock);
+	_PTHREAD_LOCK(_pthread_list_lock);
 
 	// When a thread removes itself:
 	//  - Set the childexit flag indicating that the thread has exited.
@@ -1050,17 +1159,18 @@ __pthread_remove_thread(pthread_t t, bool child, bool *should_exit)
 		TAILQ_REMOVE(&__pthread_head, t, plist);
 	}
 
-	UNLOCK(_pthread_list_lock);
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 	
 	return ret;
 }
 
-int
-pthread_create(pthread_t *thread,
+static int
+_pthread_create(pthread_t *thread,
 	const pthread_attr_t *attr,
 	void *(*start_routine)(void *),
-	void *arg)
-{	
+	void *arg,
+	bool from_mach_thread)
+{
 	pthread_t t = NULL;
 	unsigned int flags = 0;
 
@@ -1087,7 +1197,7 @@ pthread_create(pthread_t *thread,
 	__is_threaded = 1;
 
 	void *stack;
-	
+
 	if (attrs->fastpath) {
 		// kernel will allocate thread and stack, pass stacksize.
 		stack = (void *)attrs->stacksize;
@@ -1118,13 +1228,39 @@ pthread_create(pthread_t *thread,
 		t = t2;
 	}
 
-	__pthread_add_thread(t, true);
-	
-	// XXX if a thread is created detached and exits, t will be invalid
+	__pthread_add_thread(t, true, from_mach_thread);
+
+	// n.b. if a thread is created detached and exits, t will be invalid
 	*thread = t;
 	return 0;
 }
 
+int
+pthread_create(pthread_t *thread,
+	const pthread_attr_t *attr,
+	void *(*start_routine)(void *),
+	void *arg)
+{
+	return _pthread_create(thread, attr, start_routine, arg, false);
+}
+
+int
+pthread_create_from_mach_thread(pthread_t *thread,
+	const pthread_attr_t *attr,
+	void *(*start_routine)(void *),
+	void *arg)
+{
+	return _pthread_create(thread, attr, start_routine, arg, true);
+}
+
+static void
+_pthread_suspended_body(pthread_t self)
+{
+	_pthread_set_self(self);
+	__pthread_add_thread(self, false, false);
+	_pthread_exit(self, (self->fun)(self->arg));
+}
+
 int
 pthread_create_suspended_np(pthread_t *thread,
 	const pthread_attr_t *attr,
@@ -1165,10 +1301,10 @@ pthread_create_suspended_np(pthread_t *thread,
 	t->arg = arg;
 	t->fun = start_routine;
 
-	__pthread_add_thread(t, true);
+	__pthread_add_thread(t, true, false);
 
 	// Set up a suspended thread.
-	_pthread_setup(t, _pthread_body, stack, 1, 0);
+	_pthread_setup(t, _pthread_suspended_body, stack, 1, 0);
 	return res;
 }
 
@@ -1184,7 +1320,7 @@ pthread_detach(pthread_t thread)
 		return res; // Not a valid thread to detach.
 	}
 
-	LOCK(thread->lock);
+	_PTHREAD_LOCK(thread->lock);
 	if (thread->detached & PTHREAD_CREATE_JOINABLE) {
 		if (thread->detached & _PTHREAD_EXITED) {
 			// Join the thread if it's already exited.
@@ -1197,7 +1333,7 @@ pthread_detach(pthread_t thread)
 	} else {
 		res = EINVAL;
 	}
-	UNLOCK(thread->lock);
+	_PTHREAD_UNLOCK(thread->lock);
 
 	if (join) {
 		pthread_join(thread, NULL);
@@ -1238,9 +1374,9 @@ __pthread_workqueue_setkill(int enable)
 {
 	pthread_t self = pthread_self();
 
-	LOCK(self->lock);
+	_PTHREAD_LOCK(self->lock);
 	self->wqkillset = enable ? 1 : 0;
-	UNLOCK(self->lock);
+	_PTHREAD_UNLOCK(self->lock);
 
 	return 0;
 }
@@ -1288,7 +1424,7 @@ _pthread_exit(pthread_t self, void *value_ptr)
 	}
 	_pthread_tsd_cleanup(self);
 
-	LOCK(self->lock);
+	_PTHREAD_LOCK(self->lock);
 	self->detached |= _PTHREAD_EXITED;
 	self->exit_value = value_ptr;
 
@@ -1296,7 +1432,7 @@ _pthread_exit(pthread_t self, void *value_ptr)
 			self->joiner_notify == SEMAPHORE_NULL) {
 		self->joiner_notify = (semaphore_t)os_get_cached_semaphore();
 	}
-	UNLOCK(self->lock);
+	_PTHREAD_UNLOCK(self->lock);
 
 	// Clear per-thread semaphore cache
 	os_put_cached_semaphore(SEMAPHORE_NULL);
@@ -1326,7 +1462,7 @@ pthread_getschedparam(pthread_t thread,
 		return ESRCH;
 	}
 	
-	LOCK(_pthread_list_lock);
+	_PTHREAD_LOCK(_pthread_list_lock);
 
 	ret = _pthread_find_thread(thread);
 	if (ret == 0) {
@@ -1338,7 +1474,7 @@ pthread_getschedparam(pthread_t thread,
 		}
 	}
 
-	UNLOCK(_pthread_list_lock);
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 
 	return ret;
 }
@@ -1398,13 +1534,13 @@ pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
 	if (res == 0) {
 		if (bypass == 0) {
 			// Ensure the thread is still valid.
-			LOCK(_pthread_list_lock);
+			_PTHREAD_LOCK(_pthread_list_lock);
 			res = _pthread_find_thread(t);
 			if (res == 0) {
 				t->policy = policy;
 				t->param = *param;
 			}
-			UNLOCK(_pthread_list_lock);
+			_PTHREAD_UNLOCK(_pthread_list_lock);
 		}  else {
 			t->policy = policy;
 			t->param = *param;
@@ -1431,18 +1567,24 @@ pthread_equal(pthread_t t1, pthread_t t2)
 	return (t1 == t2);
 }
 
-// Force LLVM not to optimise this to a call to __pthread_set_self, if it does
-// then _pthread_set_self won't be bound when secondary threads try and start up.
+/* 
+ * Force LLVM not to optimise this to a call to __pthread_set_self, if it does
+ * then _pthread_set_self won't be bound when secondary threads try and start up.
+ */
 PTHREAD_NOINLINE
 void
 _pthread_set_self(pthread_t p)
 {
-	extern void __pthread_set_self(void *);
+	return _pthread_set_self_internal(p, true);
+}
 
+void
+_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set)
+{
 	if (p == NULL) {
 		p = &_thread;
 	}
-	
+
 	uint64_t tid = __thread_selfid();
 	if (tid == -1ull) {
 		PTHREAD_ABORT("failed to set thread_id");
@@ -1451,7 +1593,10 @@ _pthread_set_self(pthread_t p)
 	p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = p;
 	p->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &p->err_no;
 	p->thread_id = tid;
-	__pthread_set_self(&p->tsd[0]);
+
+	if (needs_tsd_base_set) {
+		_thread_set_tsd_base(&p->tsd[0]);
+	}
 }
 
 struct _pthread_once_context {
@@ -1484,9 +1629,9 @@ _pthread_testcancel(pthread_t thread, int isconforming)
 {
 	const int flags = (PTHREAD_CANCEL_ENABLE|_PTHREAD_CANCEL_PENDING);
 
-	LOCK(thread->lock);
+	_PTHREAD_LOCK(thread->lock);
 	bool canceled = ((thread->cancel_state & flags) == flags);
-	UNLOCK(thread->lock);
+	_PTHREAD_UNLOCK(thread->lock);
 	
 	if (canceled) {
 		pthread_exit(isconforming ? PTHREAD_CANCELED : 0);
@@ -1521,12 +1666,65 @@ pthread_setconcurrency(int new_level)
 	return 0;
 }
 
-void
-_pthread_set_pfz(uintptr_t address)
+static unsigned long
+_pthread_strtoul(const char *p, const char **endptr, int base)
 {
+	uintptr_t val = 0;
+	
+	// Expect hex string starting with "0x"
+	if ((base == 16 || base == 0) && p && p[0] == '0' && p[1] == 'x') {
+		p += 2;
+		while (1) {
+			char c = *p;
+			if ('0' <= c && c <= '9') {
+				val = (val << 4) + (c - '0');
+			} else if ('a' <= c && c <= 'f') {
+				val = (val << 4) + (c - 'a' + 10);
+			} else if ('A' <= c && c <= 'F') {
+				val = (val << 4) + (c - 'A' + 10);
+			} else {
+				break;
+			}
+			++p;
+		}
+	}
+
+	*endptr = (char *)p;
+	return val;
 }
 
-#if !defined(PTHREAD_TARGET_EOS) && !defined(VARIANT_DYLD)
+static int
+parse_main_stack_params(const char *apple[],
+			void **stackaddr,
+			size_t *stacksize,
+			void **allocaddr,
+			size_t *allocsize)
+{
+	const char *p = _simple_getenv(apple, "main_stack");
+	if (!p) return 0;
+
+	int ret = 0;
+	const char *s = p;
+
+	*stackaddr = _pthread_strtoul(s, &s, 16);
+	if (*s != ',') goto out;
+
+	*stacksize = _pthread_strtoul(s + 1, &s, 16);
+	if (*s != ',') goto out;
+
+	*allocaddr = _pthread_strtoul(s + 1, &s, 16);
+	if (*s != ',') goto out;
+
+	*allocsize = _pthread_strtoul(s + 1, &s, 16);
+	if (*s != ',' && *s != 0) goto out;
+
+	ret = 1;
+out:
+	bzero((char *)p, strlen(p));
+	return ret;
+}
+
+#if !defined(VARIANT_STATIC)
 void *
 malloc(size_t sz)
 {
@@ -1544,7 +1742,7 @@ free(void *p)
 		_pthread_free(p);
 	}
 }
-#endif
+#endif // VARIANT_STATIC
 
 /*
  * Perform package initialization - called automatically when application starts
@@ -1552,8 +1750,10 @@ free(void *p)
 struct ProgramVars; /* forward reference */
 
 int
-__pthread_init(const struct _libpthread_functions *pthread_funcs, const char *envp[] __unused,
-               const char *apple[] __unused, const struct ProgramVars *vars __unused)
+__pthread_init(const struct _libpthread_functions *pthread_funcs,
+	       const char *envp[] __unused,
+	       const char *apple[],
+	       const struct ProgramVars *vars __unused)
 {
 	// Save our provided pushed-down functions
 	if (pthread_funcs) {
@@ -1588,17 +1788,33 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs, const char *en
 	// Set up the main thread structure
 	//
 
-	void *stackaddr;
-	size_t stacksize = DFLSSIZ;
-    	size_t len = sizeof(stackaddr);
-    	int mib[] = { CTL_KERN, KERN_USRSTACK };
-    	if (__sysctl(mib, 2, &stackaddr, &len, NULL, 0) != 0) {
-       		stackaddr = (void *)USRSTACK;
+	// Get the address and size of the main thread's stack from the kernel.
+	void *stackaddr = 0;
+	size_t stacksize = 0;
+	void *allocaddr = 0;
+	size_t allocsize = 0;
+	if (!parse_main_stack_params(apple, &stackaddr, &stacksize, &allocaddr, &allocsize) ||
+		stackaddr == NULL || stacksize == 0) {
+		// Fall back to previous bevhaior.
+		size_t len = sizeof(stackaddr);
+		int mib[] = { CTL_KERN, KERN_USRSTACK };
+		if (__sysctl(mib, 2, &stackaddr, &len, NULL, 0) != 0) {
+#if defined(__LP64__)
+			stackaddr = (void *)USRSTACK64;
+#else
+			stackaddr = (void *)USRSTACK;
+#endif
+		}
+		stacksize = DFLSSIZ;
+		allocaddr = 0;
+		allocsize = 0;
 	}
 
 	pthread_t thread = &_thread;
 	pthread_attr_init(&_pthread_attr_default);
-	_pthread_struct_init(thread, &_pthread_attr_default, stackaddr, stacksize, 0);
+	_pthread_struct_init(thread, &_pthread_attr_default,
+			     stackaddr, stacksize,
+			     allocaddr, allocsize);
 	thread->detached = PTHREAD_CREATE_JOINABLE;
 
 	// Finish initialization with common code that is reinvoked on the
@@ -1612,6 +1828,9 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs, const char *en
 	// Set up kernel entry points with __bsdthread_register.
 	pthread_workqueue_atfork_child();
 
+	// Have pthread_key do its init envvar checks.
+	_pthread_key_global_init(envp);
+
 	return 0;
 }
 
@@ -1626,7 +1845,7 @@ PTHREAD_NOEXPORT void
 __pthread_fork_child_internal(pthread_t p)
 {
 	TAILQ_INIT(&__pthread_head);
-	LOCK_INIT(_pthread_list_lock);
+	_PTHREAD_LOCK_INIT(_pthread_list_lock);
 
 	// Re-use the main thread's static storage if no thread was provided.
 	if (p == NULL) {
@@ -1636,7 +1855,7 @@ __pthread_fork_child_internal(pthread_t p)
 		p = &_thread;
 	}
 
-	LOCK_INIT(p->lock);
+	_PTHREAD_LOCK_INIT(p->lock);
 	_pthread_set_kernel_thread(p, mach_thread_self());
 	_pthread_set_reply_port(p, mach_reply_port());
 	p->__cleanup_stack = NULL;
@@ -1677,13 +1896,13 @@ _pthread_setcancelstate_internal(int state, int *oldstate, int conforming)
 	}
 
 	self = pthread_self();
-	LOCK(self->lock);
+	_PTHREAD_LOCK(self->lock);
 	if (oldstate) {
 		*oldstate = self->cancel_state & _PTHREAD_CANCEL_STATE_MASK;
 	}
 	self->cancel_state &= ~_PTHREAD_CANCEL_STATE_MASK;
 	self->cancel_state |= state;
-	UNLOCK(self->lock);
+	_PTHREAD_UNLOCK(self->lock);
 	if (!conforming) {
 		_pthread_testcancel(self, 0);  /* See if we need to 'die' now... */
 	}
@@ -1694,7 +1913,7 @@ _pthread_setcancelstate_internal(int state, int *oldstate, int conforming)
 static void
 _pthread_setcancelstate_exit(pthread_t self, void * value_ptr, int conforming)
 {
-	LOCK(self->lock);
+	_PTHREAD_LOCK(self->lock);
 	self->cancel_state &= ~(_PTHREAD_CANCEL_STATE_MASK | _PTHREAD_CANCEL_TYPE_MASK);
 	self->cancel_state |= (PTHREAD_CANCEL_DISABLE | PTHREAD_CANCEL_DEFERRED);
 	if (value_ptr == PTHREAD_CANCELED) {
@@ -1702,7 +1921,7 @@ _pthread_setcancelstate_exit(pthread_t self, void * value_ptr, int conforming)
 		self->detached |= _PTHREAD_WASCANCEL;
 // 4597450: end
 	}
-	UNLOCK(self->lock);
+	_PTHREAD_UNLOCK(self->lock);
 }
 
 int
@@ -1732,9 +1951,9 @@ loop:
 		TAILQ_FOREACH(p, &__pthread_head, plist) {
 			if (p == thread) {
 				if (_pthread_kernel_thread(thread) == MACH_PORT_NULL) {
-					UNLOCK(_pthread_list_lock);
+					_PTHREAD_UNLOCK(_pthread_list_lock);
 					sched_yield();
-					LOCK(_pthread_list_lock);
+					_PTHREAD_LOCK(_pthread_list_lock);
 					goto loop;
 				} 
 				return 0;
@@ -1754,7 +1973,7 @@ _pthread_lookup_thread(pthread_t thread, mach_port_t *portp, int only_joinable)
 		return ESRCH;
 	}
 	
-	LOCK(_pthread_list_lock);
+	_PTHREAD_LOCK(_pthread_list_lock);
 	
 	ret = _pthread_find_thread(thread);
 	if (ret == 0) {
@@ -1767,7 +1986,7 @@ _pthread_lookup_thread(pthread_t thread, mach_port_t *portp, int only_joinable)
 		}
 	}
 	
-	UNLOCK(_pthread_list_lock);
+	_PTHREAD_UNLOCK(_pthread_list_lock);
 	
 	if (portp != NULL) {
 		*portp = kport;
@@ -1785,22 +2004,17 @@ _pthread_clear_qos_tsd(mach_port_t thread_port)
 	} else {
 		pthread_t p;
 
-		LOCK(_pthread_list_lock);
+		_PTHREAD_LOCK(_pthread_list_lock);
 
 		TAILQ_FOREACH(p, &__pthread_head, plist) {
-			mach_port_t kp;
-			while ((kp = _pthread_kernel_thread(p)) == MACH_PORT_NULL) {
-				UNLOCK(_pthread_list_lock);
-				sched_yield();
-				LOCK(_pthread_list_lock);
-			}
+			mach_port_t kp = _pthread_kernel_thread(p);
 			if (thread_port == kp) {
 				p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
 				break;
 			}
 		}
 
-		UNLOCK(_pthread_list_lock);
+		_PTHREAD_UNLOCK(_pthread_list_lock);
 	}
 }
 
@@ -1809,24 +2023,25 @@ _pthread_clear_qos_tsd(mach_port_t thread_port)
 PTHREAD_NOEXPORT void
 pthread_workqueue_atfork_child(void)
 {
-	struct _pthread_registration_data data = {
-		.dispatch_queue_offset = __PTK_LIBDISPATCH_KEY0 * sizeof(void *),
-	};
+	struct _pthread_registration_data data = {};
+	data.version = sizeof(struct _pthread_registration_data);
+	data.dispatch_queue_offset = __PTK_LIBDISPATCH_KEY0 * sizeof(void *);
+	data.tsd_offset = offsetof(struct _pthread, tsd);
 
 	int rv = __bsdthread_register(thread_start,
-			     start_wqthread,
-			     (int)pthreadsize,
-			     (void*)&data,
-			     (uintptr_t)sizeof(data),
-			     data.dispatch_queue_offset);
+			start_wqthread, (int)PTHREAD_SIZE,
+			(void*)&data, (uintptr_t)sizeof(data),
+			data.dispatch_queue_offset);
 
 	if (rv > 0) {
 		__pthread_supported_features = rv;
 	}
 
-	if (_pthread_priority_get_qos_newest(data.main_qos) != QOS_CLASS_UNSPECIFIED) {
-		_pthread_set_main_qos(data.main_qos);
-		_thread.tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = data.main_qos;
+	pthread_priority_t main_qos = (pthread_priority_t)data.main_qos;
+
+	if (_pthread_priority_get_qos_newest(main_qos) != QOS_CLASS_UNSPECIFIED) {
+		_pthread_set_main_qos(main_qos);
+		_thread.tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos;
 	}
 
 	if (__libdispatch_workerfunction != NULL) {
@@ -1835,26 +2050,43 @@ pthread_workqueue_atfork_child(void)
 	}
 }
 
+// workqueue entry point from kernel
 void
-_pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int flags)
+_pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr, void *keventlist, int flags, int nkevents)
 {
 	PTHREAD_ASSERT(flags & WQ_FLAG_THREAD_NEWSPI);
 
 	int thread_reuse = flags & WQ_FLAG_THREAD_REUSE;
 	int thread_class = flags & WQ_FLAG_THREAD_PRIOMASK;
 	int overcommit = (flags & WQ_FLAG_THREAD_OVERCOMMIT) != 0;
+	int kevent = flags & WQ_FLAG_THREAD_KEVENT;
+	PTHREAD_ASSERT((!kevent) || (__libdispatch_keventfunction != NULL));
 
-	pthread_priority_t priority;
+	pthread_priority_t priority = 0;
+	unsigned long priority_flags = 0;
+
+	if (overcommit)
+		priority_flags |= _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
+	if (flags & WQ_FLAG_THREAD_EVENT_MANAGER)
+		priority_flags |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
+	if (kevent)
+		priority_flags |= _PTHREAD_PRIORITY_NEEDS_UNBIND_FLAG;
 
 	if ((__pthread_supported_features & PTHREAD_FEATURE_QOS_MAINTENANCE) == 0) {
-		priority = _pthread_priority_make_version2(thread_class, 0, (overcommit ? _PTHREAD_PRIORITY_OVERCOMMIT_FLAG : 0));
+		priority = _pthread_priority_make_version2(thread_class, 0, priority_flags);
 	} else {
-		priority = _pthread_priority_make_newest(thread_class, 0, (overcommit ? _PTHREAD_PRIORITY_OVERCOMMIT_FLAG : 0));
+		priority = _pthread_priority_make_newest(thread_class, 0, priority_flags);
 	}
 
 	if (thread_reuse == 0) {
 		// New thread created by kernel, needs initialization.
-		_pthread_struct_init(self, &_pthread_attr_default, stackaddr, DEFAULT_STACK_SIZE, 1);
+		void *stackaddr = self;
+		size_t stacksize = (uintptr_t)self - (uintptr_t)stacklowaddr;
+
+		_pthread_struct_init(self, &_pthread_attr_default,
+							 stackaddr, stacksize,
+							 PTHREAD_ALLOCADDR(stackaddr, stacksize), PTHREAD_ALLOCSIZE(stackaddr, stacksize));
+
 		_pthread_set_kernel_thread(self, kport);
 		self->wqthread = 1;
 		self->wqkillset = 0;
@@ -1864,16 +2096,16 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unus
 		self->detached |= PTHREAD_CREATE_DETACHED;
 
 		// Update the running thread count and set childrun bit.
-		// XXX this should be consolidated with pthread_body().
-		_pthread_set_self(self);
+		bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET);
+		_pthread_set_self_internal(self, !thread_tsd_base_set);
 		_pthread_introspection_thread_create(self, false);
-		__pthread_add_thread(self, false);
+		__pthread_add_thread(self, false, false);
+	}
 
-		// If we're running with fine-grained priority, we also need to
-		// set this thread to have the QoS class provided to use by the kernel
-		if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
-			_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(thread_class, 0, 0));
-		}
+	// If we're running with fine-grained priority, we also need to
+	// set this thread to have the QoS class provided to use by the kernel
+	if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
+		_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, _pthread_priority_make_newest(thread_class, 0, priority_flags));
 	}
 
 #if WQ_DEBUG
@@ -1881,9 +2113,32 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unus
 	PTHREAD_ASSERT(self == pthread_self());
 #endif // WQ_DEBUG
 
-	self->fun = (void *(*)(void *))__libdispatch_workerfunction;
+	if (kevent){
+		self->fun = (void *(*)(void*))__libdispatch_keventfunction;
+	} else {
+		self->fun = (void *(*)(void *))__libdispatch_workerfunction;
+	}
 	self->arg = (void *)(uintptr_t)thread_class;
 
+	if (kevent && keventlist && nkevents > 0){
+	kevent_errors_retry:
+		(*__libdispatch_keventfunction)(&keventlist, &nkevents);
+
+		int errors_out = __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, keventlist, nkevents, 0);
+		if (errors_out > 0){
+			nkevents = errors_out;
+			goto kevent_errors_retry;
+		} else if (errors_out < 0){
+			PTHREAD_ABORT("kevent return produced an error: %d", errno);
+		}
+		goto thexit;
+    } else if (kevent){
+		(*__libdispatch_keventfunction)(NULL, NULL);
+
+		__workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, NULL, 0, 0);
+		goto thexit;
+    }
+
 	if (__pthread_supported_features & PTHREAD_FEATURE_FINEPRIO) {
 		if (!__workq_newapi) {
 			/* Old thread priorities are inverted from where we have them in
@@ -1946,25 +2201,19 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unus
 	}
 
 	__workq_kernreturn(WQOPS_THREAD_RETURN, NULL, 0, 0);
-	_pthread_exit(self, NULL);
-}
 
-/***** pthread workqueue API for libdispatch *****/
-
-int
-_pthread_workqueue_init(pthread_workqueue_function2_t func, int offset, int flags)
-{
-	if (flags != 0) {
-		return ENOTSUP;
+thexit:
+	{
+		// Reset QoS to something low for the cleanup process
+		pthread_priority_t priority = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
+		_pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, priority);
 	}
 
-	__workq_newapi = true;
-	__libdispatch_offset = offset;
-
-	int rv = pthread_workqueue_setdispatch_np((pthread_workqueue_function_t)func);
-	return rv;
+	_pthread_exit(self, NULL);
 }
 
+/***** pthread workqueue API for libdispatch *****/
+
 void
 pthread_workqueue_setdispatchoffset_np(int offset)
 {
@@ -1972,16 +2221,17 @@ pthread_workqueue_setdispatchoffset_np(int offset)
 }
 
 int
-pthread_workqueue_setdispatch_np(pthread_workqueue_function_t worker_func)
+pthread_workqueue_setdispatch_with_kevent_np(pthread_workqueue_function2_t queue_func, pthread_workqueue_function_kevent_t kevent_func)
 {
 	int res = EBUSY;
 	if (__libdispatch_workerfunction == NULL) {
 		// Check whether the kernel supports new SPIs
-		res = __workq_kernreturn(WQOPS_QUEUE_NEWSPISUPP, NULL, __libdispatch_offset, 0);
+		res = __workq_kernreturn(WQOPS_QUEUE_NEWSPISUPP, NULL, __libdispatch_offset, kevent_func != NULL ? 0x01 : 0x00);
 		if (res == -1){
 			res = ENOTSUP;
 		} else {
-			__libdispatch_workerfunction = (pthread_workqueue_function2_t)worker_func;
+			__libdispatch_workerfunction = queue_func;
+			__libdispatch_keventfunction = kevent_func;
 
 			// Prepare the kernel for workq action
 			(void)__workq_open();
@@ -1993,6 +2243,32 @@ pthread_workqueue_setdispatch_np(pthread_workqueue_function_t worker_func)
 	return res;
 }
 
+int
+_pthread_workqueue_init_with_kevent(pthread_workqueue_function2_t queue_func, pthread_workqueue_function_kevent_t kevent_func, int offset, int flags)
+{
+	if (flags != 0) {
+		return ENOTSUP;
+	}
+	
+	__workq_newapi = true;
+	__libdispatch_offset = offset;
+	
+	int rv = pthread_workqueue_setdispatch_with_kevent_np(queue_func, kevent_func);
+	return rv;
+}
+
+int
+_pthread_workqueue_init(pthread_workqueue_function2_t func, int offset, int flags)
+{
+	return _pthread_workqueue_init_with_kevent(func, NULL, offset, flags);
+}
+
+int
+pthread_workqueue_setdispatch_np(pthread_workqueue_function_t worker_func)
+{
+	return pthread_workqueue_setdispatch_with_kevent_np((pthread_workqueue_function2_t)worker_func, NULL);
+}
+
 int
 _pthread_workqueue_supported(void)
 {
@@ -2070,6 +2346,16 @@ _pthread_workqueue_addthreads(int numthreads, pthread_priority_t priority)
 	return res;
 }
 
+int
+_pthread_workqueue_set_event_manager_priority(pthread_priority_t priority)
+{
+	int res = __workq_kernreturn(WQOPS_SET_EVENT_MANAGER_PRIORITY, NULL, (int)priority, 0);
+	if (res == -1) {
+		res = errno;
+	}
+	return res;
+}
+
 /*
  * Introspection SPI for libpthread.
  */
@@ -2092,7 +2378,7 @@ static void
 _pthread_introspection_hook_callout_thread_create(pthread_t t, bool destroy)
 {
 	_pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_CREATE, t, t,
-			pthreadsize);
+			PTHREAD_SIZE);
 	if (!destroy) return;
 	_pthread_introspection_thread_destroy(t);
 }
@@ -2114,7 +2400,7 @@ _pthread_introspection_hook_callout_thread_start(pthread_t t)
 		freesize = t->stacksize + t->guardsize;
 		freeaddr = t->stackaddr - freesize;
 	} else {
-		freesize = t->freesize - pthreadsize;
+		freesize = t->freesize - PTHREAD_SIZE;
 		freeaddr = t->freeaddr;
 	}
 	_pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_START, t,
@@ -2134,7 +2420,7 @@ _pthread_introspection_hook_callout_thread_terminate(pthread_t t,
 		void *freeaddr, size_t freesize, bool destroy)
 {
 	if (destroy && freesize) {
-		freesize -= pthreadsize;
+		freesize -= PTHREAD_SIZE;
 	}
 	_pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_TERMINATE, t,
 			freeaddr, freesize);
@@ -2157,7 +2443,7 @@ _pthread_introspection_hook_callout_thread_destroy(pthread_t t)
 {
 	if (t == &_thread) return;
 	_pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_DESTROY, t, t,
-			pthreadsize);
+			PTHREAD_SIZE);
 }
 
 static inline void