]> git.saurik.com Git - apple/libpthread.git/blobdiff - src/pthread.c
libpthread-454.100.8.tar.gz
[apple/libpthread.git] / src / pthread.c
index 3c3ea6a02645a2a1d6ad3e3ab27d0fbc640f69fa..6882fa2e6f33bbeccac35d46b4c7e6851e36d4b7 100644 (file)
  * POSIX Pthread Library
  */
 
-#include "resolver.h"
 #include "internal.h"
-#include "private.h"
-#include "workqueue_private.h"
-#include "introspection_private.h"
-#include "qos_private.h"
-#include "tsd_private.h"
-#include "pthread/stack_np.h"
-#include "offsets.h" // included to validate the offsets at build time
 
 #include <stdlib.h>
 #include <errno.h>
 #include <machine/vmparam.h>
 #define        __APPLE_API_PRIVATE
 #include <machine/cpu_capabilities.h>
-
-#include <_simple.h>
-#include <platform/string.h>
-#include <platform/compat.h>
-
-extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
-               void *newp, size_t newlen);
-extern void __exit(int) __attribute__((noreturn));
-extern int __pthread_kill(mach_port_t, int);
-
-extern void _pthread_joiner_wake(pthread_t thread);
-
-#if !VARIANT_DYLD
-PTHREAD_NOEXPORT extern struct _pthread *_main_thread_ptr;
-#define main_thread() (_main_thread_ptr)
-#endif // VARIANT_DYLD
+#if __has_include(<ptrauth.h>)
+#include <ptrauth.h>
+#endif // __has_include(<ptrauth.h>)
+#include <os/thread_self_restrict.h>
+#include <os/tsd.h>
 
 // Default stack size is 512KB; independent of the main thread's stack size.
 #define DEFAULT_STACK_SIZE (size_t)(512 * 1024)
 
-
 //
 // Global constants
 //
@@ -106,7 +86,7 @@ PTHREAD_NOEXPORT extern struct _pthread *_main_thread_ptr;
  * start of the next page.  There's also one page worth of allocation
  * below stacksize for the guard page. <rdar://problem/19941744>
  */
-#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct _pthread)))
+#define PTHREAD_SIZE ((size_t)mach_vm_round_page(sizeof(struct pthread_s)))
 #define PTHREAD_ALLOCADDR(stackaddr, stacksize) ((stackaddr - stacksize) - vm_page_size)
 #define PTHREAD_ALLOCSIZE(stackaddr, stacksize) ((round_page((uintptr_t)stackaddr) + PTHREAD_SIZE) - (uintptr_t)PTHREAD_ALLOCADDR(stackaddr, stacksize))
 
@@ -126,9 +106,9 @@ static const pthread_attr_t _pthread_attr_default = {
 
 const struct pthread_layout_offsets_s pthread_layout_offsets = {
        .plo_version = 1,
-       .plo_pthread_tsd_base_offset = offsetof(struct _pthread, tsd),
+       .plo_pthread_tsd_base_offset = offsetof(struct pthread_s, tsd),
        .plo_pthread_tsd_base_address_offset = 0,
-       .plo_pthread_tsd_entry_size = sizeof(((struct _pthread *)NULL)->tsd[0]),
+       .plo_pthread_tsd_entry_size = sizeof(((struct pthread_s *)NULL)->tsd[0]),
 };
 
 #endif // PTHREAD_LAYOUT_SPI
@@ -140,7 +120,7 @@ const struct pthread_layout_offsets_s pthread_layout_offsets = {
 // This global should be used (carefully) by anyone needing to know if a
 // pthread (other than the main thread) has been created.
 int __is_threaded = 0;
-int __unix_conforming = 0;
+const int __unix_conforming = 1; // we're always conformant, but it's exported
 
 //
 // Global internal variables
@@ -155,10 +135,11 @@ uint32_t _main_qos;
 
 #if VARIANT_DYLD
 // The main thread's pthread_t
-struct _pthread _main_thread __attribute__((aligned(64))) = { };
-#define main_thread() (&_main_thread)
+struct pthread_s _main_thread OS_ALIGNED(64);
 #else // VARIANT_DYLD
-struct _pthread *_main_thread_ptr;
+pthread_t _main_thread_ptr;
+void *(*_pthread_malloc)(size_t);
+void (*_pthread_free)(void *);
 #endif // VARIANT_DYLD
 
 #if PTHREAD_DEBUG_LOG
@@ -178,23 +159,19 @@ static uint8_t min_priority;
 #endif // !VARIANT_DYLD
 static int _pthread_count = 1;
 static int pthread_concurrency;
-static uintptr_t _pthread_ptr_munge_token;
+uintptr_t _pthread_ptr_munge_token;
 
 static void (*exitf)(int) = __exit;
-#if !VARIANT_DYLD
-static void *(*_pthread_malloc)(size_t) = NULL;
-static void (*_pthread_free)(void *) = NULL;
-#endif // !VARIANT_DYLD
 
 // work queue support data
-PTHREAD_NORETURN
+OS_NORETURN OS_COLD
 static void
 __pthread_invalid_keventfunction(void **events, int *nevents)
 {
        PTHREAD_CLIENT_CRASH(0, "Invalid kqworkq setup");
 }
 
-PTHREAD_NORETURN
+OS_NORETURN OS_COLD
 static void
 __pthread_invalid_workloopfunction(uint64_t *workloop_id, void **events, int *nevents)
 {
@@ -203,11 +180,12 @@ __pthread_invalid_workloopfunction(uint64_t *workloop_id, void **events, int *ne
 static pthread_workqueue_function2_t __libdispatch_workerfunction;
 static pthread_workqueue_function_kevent_t __libdispatch_keventfunction = &__pthread_invalid_keventfunction;
 static pthread_workqueue_function_workloop_t __libdispatch_workloopfunction = &__pthread_invalid_workloopfunction;
-static int __libdispatch_offset;
 static int __pthread_supported_features; // supported feature set
 
 #if defined(__i386__) || defined(__x86_64__)
 static mach_vm_address_t __pthread_stack_hint = 0xB0000000;
+#elif defined(__arm__) || defined(__arm64__)
+static mach_vm_address_t __pthread_stack_hint = 0x30000000;
 #else
 #error no __pthread_stack_hint for this architecture
 #endif
@@ -223,10 +201,7 @@ static inline void _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs
 #if VARIANT_DYLD
 static void _pthread_set_self_dyld(void);
 #endif // VARIANT_DYLD
-static inline void _pthread_set_self_internal(pthread_t, bool needs_tsd_base_set);
-
-static void _pthread_dealloc_reply_port(pthread_t t);
-static void _pthread_dealloc_special_reply_port(pthread_t t);
+static inline void _pthread_set_self_internal(pthread_t);
 
 static inline void __pthread_started_thread(pthread_t t);
 
@@ -237,10 +212,6 @@ static inline void _pthread_introspection_thread_start(pthread_t t);
 static inline void _pthread_introspection_thread_terminate(pthread_t t);
 static inline void _pthread_introspection_thread_destroy(pthread_t t);
 
-extern void _pthread_set_self(pthread_t);
-extern void start_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *unused, int reuse); // trampoline into _pthread_wqthread
-extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); // trampoline into _pthread_start
-
 /*
  * Flags filed passed to bsdthread_create and back in pthread_start
  * 31  <---------------------------------> 0
@@ -259,30 +230,8 @@ extern void thread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *)
 #define PTHREAD_START_POLICY_MASK 0xff
 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
 
-#if (!defined(__OPEN_SOURCE__) && TARGET_OS_OSX) || OS_VARIANT_RESOLVED // 40703288
-static int pthread_setschedparam_internal(pthread_t, mach_port_t, int,
-               const struct sched_param *);
-#endif
-
-extern pthread_t __bsdthread_create(void *(*func)(void *), void * func_arg, void * stack, pthread_t  thread, unsigned int flags);
-extern int __bsdthread_register(void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), void (*)(pthread_t, mach_port_t, void *, void *, int), int,void (*)(pthread_t, mach_port_t, void *(*)(void *), void *, size_t, unsigned int), int32_t *,__uint64_t);
-extern int __bsdthread_terminate(void * freeaddr, size_t freesize, mach_port_t kport, mach_port_t joinsem);
-extern __uint64_t __thread_selfid( void );
-
-#if __LP64__
-_Static_assert(offsetof(struct _pthread, tsd) == 224, "TSD LP64 offset");
-#else
-_Static_assert(offsetof(struct _pthread, tsd) == 176, "TSD ILP32 offset");
-#endif
-_Static_assert(offsetof(struct _pthread, tsd) + _PTHREAD_STRUCT_DIRECT_THREADID_OFFSET
-               == offsetof(struct _pthread, thread_id),
-               "_PTHREAD_STRUCT_DIRECT_THREADID_OFFSET is correct");
-
 #pragma mark pthread attrs
 
-_Static_assert(sizeof(struct _pthread_attr_t) == sizeof(__darwin_pthread_attr_t),
-               "internal pthread_attr_t == external pthread_attr_t");
-
 int
 pthread_attr_destroy(pthread_attr_t *attr)
 {
@@ -316,7 +265,7 @@ pthread_attr_getinheritsched(const pthread_attr_t *attr, int *inheritsched)
        return ret;
 }
 
-static PTHREAD_ALWAYS_INLINE void
+static OS_ALWAYS_INLINE void
 _pthread_attr_get_schedparam(const pthread_attr_t *attr,
                struct sched_param *param)
 {
@@ -396,6 +345,8 @@ pthread_attr_setschedparam(pthread_attr_t *attr, const struct sched_param *param
        return ret;
 }
 
+#define _PTHREAD_POLICY_IS_FIXEDPRI(x) ((x) == SCHED_RR || (x) == SCHED_FIFO)
+
 int
 pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy)
 {
@@ -455,7 +406,7 @@ pthread_attr_setstackaddr(pthread_attr_t *attr, void *stackaddr)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
-                       ((uintptr_t)stackaddr % vm_page_size) == 0) {
+                       ((mach_vm_address_t)stackaddr & vm_page_mask) == 0) {
                attr->stackaddr = stackaddr;
                attr->defaultguardpage = false;
                attr->guardsize = 0;
@@ -484,9 +435,16 @@ pthread_attr_getstacksize(const pthread_attr_t *attr, size_t *stacksize)
 int
 pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize)
 {
+#if TARGET_OS_OSX
+       // If the caller is doing something reasonable, help them out.
+       if (stacksize % 0x1000 == 0) {
+               stacksize = round_page(stacksize);
+       }
+#endif // TARGET_OS_OSX
+
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
-                       (stacksize % vm_page_size) == 0 &&
+                       ((stacksize & vm_page_mask) == 0) &&
                        stacksize >= PTHREAD_STACK_MIN) {
                attr->stacksize = stacksize;
                ret = 0;
@@ -513,8 +471,8 @@ pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize)
 {
        int ret = EINVAL;
        if (attr->sig == _PTHREAD_ATTR_SIG &&
-                       ((uintptr_t)stackaddr % vm_page_size) == 0 &&
-                       (stacksize % vm_page_size) == 0 &&
+                       (((mach_vm_address_t)stackaddr & vm_page_mask) == 0) &&
+                       ((stacksize & vm_page_mask) == 0) &&
                        stacksize >= PTHREAD_STACK_MIN) {
                attr->stackaddr = (void *)((uintptr_t)stackaddr + stacksize);
                attr->stacksize = stacksize;
@@ -526,8 +484,16 @@ pthread_attr_setstack(pthread_attr_t *attr, void *stackaddr, size_t stacksize)
 int
 pthread_attr_setguardsize(pthread_attr_t *attr, size_t guardsize)
 {
+#if TARGET_OS_OSX
+       // If the caller is doing something reasonable, help them out.
+       if (guardsize % 0x1000 == 0) {
+               guardsize = round_page(guardsize);
+       }
+#endif // TARGET_OS_OSX
+
        int ret = EINVAL;
-       if (attr->sig == _PTHREAD_ATTR_SIG && (guardsize % vm_page_size) == 0) {
+       if (attr->sig == _PTHREAD_ATTR_SIG &&
+                       (guardsize & vm_page_mask) == 0) {
                /* Guardsize of 0 is valid, means no guard */
                attr->defaultguardpage = false;
                attr->guardsize = guardsize;
@@ -584,20 +550,26 @@ pthread_attr_setcpupercent_np(pthread_attr_t *attr, int percent,
 // it should be freed.
 
 static pthread_t
-_pthread_allocate(const pthread_attr_t *attrs, void **stack)
+_pthread_allocate(const pthread_attr_t *attrs, void **stack,
+               bool from_mach_thread)
 {
        mach_vm_address_t allocaddr = __pthread_stack_hint;
        size_t allocsize, guardsize, stacksize, pthreadoff;
        kern_return_t kr;
        pthread_t t;
 
-       PTHREAD_ASSERT(attrs->stacksize == 0 ||
-                       attrs->stacksize >= PTHREAD_STACK_MIN);
+       if (os_unlikely(attrs->stacksize != 0 &&
+                       attrs->stacksize < PTHREAD_STACK_MIN)) {
+               PTHREAD_CLIENT_CRASH(attrs->stacksize, "Stack size in attrs is too small");
+       }
+
+       if (os_unlikely((mach_vm_address_t)attrs->stackaddr & vm_page_mask)) {
+               PTHREAD_CLIENT_CRASH(attrs->stackaddr, "Unaligned stack addr in attrs");
+       }
 
        // Allocate a pthread structure if necessary
 
        if (attrs->stackaddr != NULL) {
-               PTHREAD_ASSERT(((uintptr_t)attrs->stackaddr % vm_page_size) == 0);
                allocsize = PTHREAD_SIZE;
                guardsize = 0;
                pthreadoff = 0;
@@ -621,10 +593,32 @@ _pthread_allocate(const pthread_attr_t *attrs, void **stack)
        if (kr != KERN_SUCCESS) {
                kr = mach_vm_allocate(mach_task_self(), &allocaddr, allocsize,
                                 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
+       } else if (__syscall_logger && !from_mach_thread) {
+               // libsyscall will not output malloc stack logging events when
+               // VM_MEMORY_STACK is passed in to facilitate mach thread promotion.
+               // To avoid losing the stack traces for normal p-thread create
+               // operations, libpthread must pretend to be the vm syscall and log
+               // the allocations. <rdar://36418708>
+               int eventTypeFlags = stack_logging_type_vm_allocate |
+                               stack_logging_type_mapped_file_or_shared_mem;
+               __syscall_logger(eventTypeFlags | VM_MAKE_TAG(VM_MEMORY_STACK),
+                               (uintptr_t)mach_task_self(), (uintptr_t)allocsize, 0,
+                               (uintptr_t)allocaddr, 0);
        }
+
        if (kr != KERN_SUCCESS) {
                *stack  = NULL;
                return NULL;
+       } else if (__syscall_logger && !from_mach_thread) {
+               // libsyscall will not output malloc stack logging events when
+               // VM_MEMORY_STACK is passed in to facilitate mach thread promotion.
+               // To avoid losing the stack traces for normal p-thread create
+               // operations, libpthread must pretend to be the vm syscall and log
+               // the allocations. <rdar://36418708>
+               int eventTypeFlags = stack_logging_type_vm_allocate;
+               __syscall_logger(eventTypeFlags | VM_MAKE_TAG(VM_MEMORY_STACK),
+                               (uintptr_t)mach_task_self(), (uintptr_t)allocsize, 0,
+                               (uintptr_t)allocaddr, 0);
        }
 
        // The stack grows down.
@@ -650,7 +644,7 @@ _pthread_allocate(const pthread_attr_t *attrs, void **stack)
        return t;
 }
 
-PTHREAD_NOINLINE
+OS_NOINLINE
 void
 _pthread_deallocate(pthread_t t, bool from_mach_thread)
 {
@@ -662,14 +656,16 @@ _pthread_deallocate(pthread_t t, bool from_mach_thread)
                        _pthread_introspection_thread_destroy(t);
                }
                ret = mach_vm_deallocate(mach_task_self(), t->freeaddr, t->freesize);
-               PTHREAD_ASSERT(ret == KERN_SUCCESS);
+               if (ret != KERN_SUCCESS) {
+                       PTHREAD_INTERNAL_CRASH(ret, "Unable to deallocate stack");
+               }
        }
 }
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wreturn-stack-address"
 
-PTHREAD_NOINLINE
+OS_NOINLINE
 static void*
 _pthread_current_stack_address(void)
 {
@@ -679,7 +675,7 @@ _pthread_current_stack_address(void)
 
 #pragma clang diagnostic pop
 
-void
+static void
 _pthread_joiner_wake(pthread_t thread)
 {
        uint32_t *exit_gate = &thread->tl_exit_gate;
@@ -695,13 +691,31 @@ _pthread_joiner_wake(pthread_t thread)
        }
 }
 
+static void
+_pthread_dealloc_reply_port(pthread_t self)
+{
+       mach_port_t port = _pthread_tsd_slot(self, MIG_REPLY);
+       if (port != MACH_PORT_NULL) {
+               // this will also set the TSD to MACH_PORT_NULL
+               mig_dealloc_reply_port(port);
+       }
+}
+
+static void
+_pthread_dealloc_special_reply_port(pthread_t self)
+{
+       mach_port_t port = _pthread_tsd_slot(self, MACH_SPECIAL_REPLY);
+       if (port != MACH_PORT_NULL) {
+               _pthread_tsd_slot(self, MACH_SPECIAL_REPLY) = MACH_PORT_NULL;
+               thread_destruct_special_reply_port(port, THREAD_SPECIAL_REPLY_PORT_ALL);
+       }
+}
+
 // Terminates the thread if called from the currently running thread.
-PTHREAD_NORETURN PTHREAD_NOINLINE PTHREAD_NOT_TAIL_CALLED
+OS_NORETURN OS_NOINLINE OS_NOT_TAIL_CALLED
 static void
 _pthread_terminate(pthread_t t, void *exit_value)
 {
-       PTHREAD_ASSERT(t == pthread_self());
-
        _pthread_introspection_thread_terminate(t);
 
        uintptr_t freeaddr = (uintptr_t)t->freeaddr;
@@ -737,14 +751,14 @@ _pthread_terminate(pthread_t t, void *exit_value)
                freesize_stack = 0;
        }
 
-       mach_port_t kport = _pthread_kernel_thread(t);
+       mach_port_t kport = _pthread_tsd_slot(t, MACH_THREAD_SELF);
        bool keep_thread_struct = false, needs_wake = false;
        semaphore_t custom_stack_sema = MACH_PORT_NULL;
 
        _pthread_dealloc_special_reply_port(t);
        _pthread_dealloc_reply_port(t);
 
-       _PTHREAD_LOCK(_pthread_list_lock);
+       _pthread_lock_lock(&_pthread_list_lock);
 
        // This piece of code interacts with pthread_join. It will always:
        // - set tl_exit_gate to MACH_PORT_DEAD (thread exited)
@@ -771,7 +785,7 @@ _pthread_terminate(pthread_t t, void *exit_value)
                TAILQ_REMOVE(&__pthread_head, t, tl_plist);
        }
 
-       _PTHREAD_UNLOCK(_pthread_list_lock);
+       _pthread_lock_unlock(&_pthread_list_lock);
 
        if (needs_wake) {
                // When we found a waiter, we want to drop the very contended list lock
@@ -782,12 +796,12 @@ _pthread_terminate(pthread_t t, void *exit_value)
                // - the exiting thread tries to set tl_joiner_cleans_up to true
                // Whoever does it first commits the other guy to cleanup the pthread_t
                _pthread_joiner_wake(t);
-               _PTHREAD_LOCK(_pthread_list_lock);
+               _pthread_lock_lock(&_pthread_list_lock);
                if (t->tl_join_ctx) {
                        t->tl_joiner_cleans_up = true;
                        keep_thread_struct = true;
                }
-               _PTHREAD_UNLOCK(_pthread_list_lock);
+               _pthread_lock_unlock(&_pthread_list_lock);
        }
 
        //
@@ -813,7 +827,7 @@ _pthread_terminate(pthread_t t, void *exit_value)
        PTHREAD_INTERNAL_CRASH(t, "thread didn't terminate");
 }
 
-PTHREAD_NORETURN
+OS_NORETURN
 static void
 _pthread_terminate_invoke(pthread_t t, void *exit_value)
 {
@@ -839,62 +853,47 @@ _pthread_terminate_invoke(pthread_t t, void *exit_value)
 
 #pragma mark pthread start / body
 
-/*
- * Create and start execution of a new thread.
- */
-PTHREAD_NOINLINE PTHREAD_NORETURN
-static void
-_pthread_body(pthread_t self, bool needs_tsd_base_set)
-{
-       _pthread_set_self_internal(self, needs_tsd_base_set);
-       __pthread_started_thread(self);
-       _pthread_exit(self, (self->fun)(self->arg));
-}
-
-PTHREAD_NORETURN
 void
 _pthread_start(pthread_t self, mach_port_t kport,
                __unused void *(*fun)(void *), __unused void *arg,
                __unused size_t stacksize, unsigned int pflags)
 {
-       bool thread_tsd_bsd_set = (bool)(pflags & PTHREAD_START_TSD_BASE_SET);
-
        if (os_unlikely(pflags & PTHREAD_START_SUSPENDED)) {
-               PTHREAD_INTERNAL_CRASH(0,
+               PTHREAD_INTERNAL_CRASH(pflags,
                                "kernel without PTHREAD_START_SUSPENDED support");
        }
-#if DEBUG
-       PTHREAD_ASSERT(MACH_PORT_VALID(kport));
-       PTHREAD_ASSERT(_pthread_kernel_thread(self) == kport);
-#endif
-       // will mark the thread initialized
+       if (os_unlikely((pflags & PTHREAD_START_TSD_BASE_SET) == 0)) {
+               PTHREAD_INTERNAL_CRASH(pflags,
+                               "thread_set_tsd_base() wasn't called by the kernel");
+       }
+       PTHREAD_DEBUG_ASSERT(MACH_PORT_VALID(kport));
+       PTHREAD_DEBUG_ASSERT(_pthread_tsd_slot(self, MACH_THREAD_SELF) == kport);
+       _pthread_validate_signature(self);
        _pthread_markcancel_if_canceled(self, kport);
 
-       _pthread_body(self, !thread_tsd_bsd_set);
+       _pthread_set_self_internal(self);
+       __pthread_started_thread(self);
+       _pthread_exit(self, (self->fun)(self->arg));
 }
 
-PTHREAD_ALWAYS_INLINE
+OS_ALWAYS_INLINE
 static inline void
 _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs,
                void *stackaddr, size_t stacksize, void *freeaddr, size_t freesize)
 {
-#if DEBUG
-       PTHREAD_ASSERT(t->sig != _PTHREAD_SIG);
-#endif
-
-       t->sig = _PTHREAD_SIG;
-       t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = t;
-       t->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &t->err_no;
+       _pthread_init_signature(t);
+       _pthread_tsd_slot(t, PTHREAD_SELF) = t;
+       _pthread_tsd_slot(t, ERRNO) = &t->err_no;
        if (attrs->schedset == 0) {
-               t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = attrs->qosclass;
+               _pthread_tsd_slot(t, PTHREAD_QOS_CLASS) = attrs->qosclass;
        } else {
-               t->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] =
+               _pthread_tsd_slot(t, PTHREAD_QOS_CLASS) =
                                _pthread_unspecified_priority();
        }
-       t->tsd[_PTHREAD_TSD_SLOT_PTR_MUNGE] = _pthread_ptr_munge_token;
+       _pthread_tsd_slot(t, PTR_MUNGE) = _pthread_ptr_munge_token;
        t->tl_has_custom_stack = (attrs->stackaddr != NULL);
 
-       _PTHREAD_LOCK_INIT(t->lock);
+       _pthread_lock_init(&t->lock);
 
        t->stackaddr = stackaddr;
        t->stackbottom = stackaddr - stacksize;
@@ -912,13 +911,6 @@ _pthread_struct_init(pthread_t t, const pthread_attr_t *attrs,
 
 #pragma mark pthread public interface
 
-/* Need to deprecate this in future */
-int
-_pthread_is_threaded(void)
-{
-       return __is_threaded;
-}
-
 /* Non portable public api to know whether this process has(had) atleast one thread
  * apart from main thread. There could be race if there is a thread in the process of
  * creation at the time of call . It does not tell whether there are more than one thread
@@ -930,8 +922,6 @@ pthread_is_threaded_np(void)
        return __is_threaded;
 }
 
-
-PTHREAD_NOEXPORT_VARIANT
 mach_port_t
 pthread_mach_thread_np(pthread_t t)
 {
@@ -940,38 +930,35 @@ pthread_mach_thread_np(pthread_t t)
        return kport;
 }
 
-PTHREAD_NOEXPORT_VARIANT
 pthread_t
 pthread_from_mach_thread_np(mach_port_t kernel_thread)
 {
-       struct _pthread *p = NULL;
+       pthread_t p = NULL;
 
        /* No need to wait as mach port is already known */
-       _PTHREAD_LOCK(_pthread_list_lock);
+       _pthread_lock_lock(&_pthread_list_lock);
 
        TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
-               if (_pthread_kernel_thread(p) == kernel_thread) {
+               if (_pthread_tsd_slot(p, MACH_THREAD_SELF) == kernel_thread) {
                        break;
                }
        }
 
-       _PTHREAD_UNLOCK(_pthread_list_lock);
+       _pthread_lock_unlock(&_pthread_list_lock);
 
        return p;
 }
 
-PTHREAD_NOEXPORT_VARIANT
 size_t
 pthread_get_stacksize_np(pthread_t t)
 {
        size_t size = 0;
-       size_t stacksize = t->stackaddr - t->stackbottom;
 
        if (t == NULL) {
                return ESRCH; // XXX bug?
        }
 
-#if !defined(__arm__) && !defined(__arm64__)
+#if TARGET_OS_OSX
        // The default rlimit based allocations will be provided with a stacksize
        // of the current limit and a freesize of the max.  However, custom
        // allocations will just have the guard page to free.  If we aren't in the
@@ -982,37 +969,40 @@ pthread_get_stacksize_np(pthread_t t)
        //
        // Of course, on arm rlim_cur == rlim_max and there's only the one guard
        // page.  So, we can skip all this there.
-       if (t == main_thread() && stacksize + vm_page_size != t->freesize) {
-               // We want to call getrlimit() just once, as it's relatively expensive
-               static size_t rlimit_stack;
+       if (t == main_thread()) {
+               size_t stacksize = t->stackaddr - t->stackbottom;
+
+               if (stacksize + vm_page_size != t->freesize) {
+                       // We want to call getrlimit() just once, as it's relatively
+                       // expensive
+                       static size_t rlimit_stack;
 
-               if (rlimit_stack == 0) {
-                       struct rlimit limit;
-                       int ret = getrlimit(RLIMIT_STACK, &limit);
+                       if (rlimit_stack == 0) {
+                               struct rlimit limit;
+                               int ret = getrlimit(RLIMIT_STACK, &limit);
 
-                       if (ret == 0) {
-                               rlimit_stack = (size_t) limit.rlim_cur;
+                               if (ret == 0) {
+                                       rlimit_stack = (size_t) limit.rlim_cur;
+                               }
                        }
-               }
 
-               if (rlimit_stack == 0 || rlimit_stack > t->freesize) {
-                       return stacksize;
-               } else {
-                       return rlimit_stack;
+                       if (rlimit_stack == 0 || rlimit_stack > t->freesize) {
+                               return stacksize;
+                       } else {
+                               return round_page(rlimit_stack);
+                       }
                }
        }
-#endif /* !defined(__arm__) && !defined(__arm64__) */
+#endif /* TARGET_OS_OSX */
 
        if (t == pthread_self() || t == main_thread()) {
-               size = stacksize;
+               size = t->stackaddr - t->stackbottom;;
                goto out;
        }
 
        if (_pthread_validate_thread_and_list_lock(t)) {
-               size = stacksize;
-               _PTHREAD_UNLOCK(_pthread_list_lock);
-       } else {
-               size = ESRCH; // XXX bug?
+               size = t->stackaddr - t->stackbottom;;
+               _pthread_lock_unlock(&_pthread_list_lock);
        }
 
 out:
@@ -1021,7 +1011,6 @@ out:
        return size ? size : DEFAULT_STACK_SIZE;
 }
 
-PTHREAD_NOEXPORT_VARIANT
 void *
 pthread_get_stackaddr_np(pthread_t t)
 {
@@ -1035,65 +1024,10 @@ pthread_get_stackaddr_np(pthread_t t)
        }
 
        void *addr = t->stackaddr;
-       _PTHREAD_UNLOCK(_pthread_list_lock);
+       _pthread_lock_unlock(&_pthread_list_lock);
        return addr;
 }
 
-
-static mach_port_t
-_pthread_reply_port(pthread_t t)
-{
-       void *p;
-       if (t == NULL) {
-               p = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MIG_REPLY);
-       } else {
-               p = t->tsd[_PTHREAD_TSD_SLOT_MIG_REPLY];
-       }
-       return (mach_port_t)(uintptr_t)p;
-}
-
-static void
-_pthread_set_reply_port(pthread_t t, mach_port_t reply_port)
-{
-       void *p = (void *)(uintptr_t)reply_port;
-       if (t == NULL) {
-               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_MIG_REPLY, p);
-       } else {
-               t->tsd[_PTHREAD_TSD_SLOT_MIG_REPLY] = p;
-       }
-}
-
-static void
-_pthread_dealloc_reply_port(pthread_t t)
-{
-       mach_port_t reply_port = _pthread_reply_port(t);
-       if (reply_port != MACH_PORT_NULL) {
-               mig_dealloc_reply_port(reply_port);
-       }
-}
-
-static mach_port_t
-_pthread_special_reply_port(pthread_t t)
-{
-       void *p;
-       if (t == NULL) {
-               p = _pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_SPECIAL_REPLY);
-       } else {
-               p = t->tsd[_PTHREAD_TSD_SLOT_MACH_SPECIAL_REPLY];
-       }
-       return (mach_port_t)(uintptr_t)p;
-}
-
-static void
-_pthread_dealloc_special_reply_port(pthread_t t)
-{
-       mach_port_t special_reply_port = _pthread_special_reply_port(t);
-       if (special_reply_port != MACH_PORT_NULL) {
-               thread_destruct_special_reply_port(special_reply_port,
-                               THREAD_SPECIAL_REPLY_PORT_ALL);
-       }
-}
-
 pthread_t
 pthread_main_thread_np(void)
 {
@@ -1107,13 +1041,33 @@ pthread_main_np(void)
        return pthread_self() == main_thread();
 }
 
+static int
+_pthread_threadid_slow(pthread_t thread, uint64_t *thread_id)
+{
+       unsigned int info_count = THREAD_IDENTIFIER_INFO_COUNT;
+       mach_port_t thport = _pthread_tsd_slot(thread, MACH_THREAD_SELF);
+       struct thread_identifier_info info;
+       kern_return_t kr;
+
+       kr = thread_info(thport, THREAD_IDENTIFIER_INFO,
+                       (thread_info_t)&info, &info_count);
+       if (kr == KERN_SUCCESS && info.thread_id) {
+               *thread_id = info.thread_id;
+#if __LP64__
+               os_atomic_store(&thread->thread_id, info.thread_id, relaxed);
+#else
+               os_atomic_store_wide(&thread->thread_id, info.thread_id, relaxed);
+#endif
+               return 0;
+       }
+       return EINVAL;
+}
 
 /*
  * if we are passed in a pthread_t that is NULL, then we return the current
  * thread's thread_id. So folks don't have to call pthread_self, in addition to
  * us doing it, if they just want their thread_id.
  */
-PTHREAD_NOEXPORT_VARIANT
 int
 pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
 {
@@ -1129,17 +1083,33 @@ pthread_threadid_np(pthread_t thread, uint64_t *thread_id)
        } else if (!_pthread_validate_thread_and_list_lock(thread)) {
                res = ESRCH;
        } else {
-               if (thread->thread_id == 0) {
-                       res = EINVAL;
-               } else {
-                       *thread_id = thread->thread_id;
+#if __LP64__
+               *thread_id = os_atomic_load(&thread->thread_id, relaxed);
+#else
+               *thread_id = os_atomic_load_wide(&thread->thread_id, relaxed);
+#endif
+               if (os_unlikely(*thread_id == 0)) {
+                       // there is a race at init because the thread sets its own TID.
+                       // correct this by asking mach
+                       res = _pthread_threadid_slow(thread, thread_id);
                }
-               _PTHREAD_UNLOCK(_pthread_list_lock);
+               _pthread_lock_unlock(&_pthread_list_lock);
        }
        return res;
 }
 
-PTHREAD_NOEXPORT_VARIANT
+int
+pthread_cpu_number_np(size_t *cpu_id)
+{
+       if (cpu_id == NULL)  {
+               errno = EINVAL;
+               return errno;
+       }
+
+       *cpu_id = _os_cpu_number();
+       return 0;
+}
+
 int
 pthread_getname_np(pthread_t thread, char *threadname, size_t len)
 {
@@ -1153,11 +1123,10 @@ pthread_getname_np(pthread_t thread, char *threadname, size_t len)
        }
 
        strlcpy(threadname, thread->pthread_name, len);
-       _PTHREAD_UNLOCK(_pthread_list_lock);
+       _pthread_lock_unlock(&_pthread_list_lock);
        return 0;
 }
 
-
 int
 pthread_setname_np(const char *name)
 {
@@ -1169,7 +1138,8 @@ pthread_setname_np(const char *name)
                len = strlen(name);
        }
 
-       /* protytype is in pthread_internals.h */
+       _pthread_validate_signature(self);
+
        res = __proc_info(5, getpid(), 2, (uint64_t)0, (void*)name, (int)len);
        if (res == 0) {
                if (len > 0) {
@@ -1182,24 +1152,37 @@ pthread_setname_np(const char *name)
 
 }
 
-PTHREAD_ALWAYS_INLINE
-static inline void
-__pthread_add_thread(pthread_t t, bool from_mach_thread)
+#if TARGET_OS_OSX
+
+void
+pthread_jit_write_protect_np(int enable)
 {
-       if (from_mach_thread) {
-               _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
-       } else {
-               _PTHREAD_LOCK(_pthread_list_lock);
-       }
+        if (!os_thread_self_restrict_rwx_is_supported()) {
+                return;
+        }
 
+        if (enable) {
+                os_thread_self_restrict_rwx_to_rx();
+        } else {
+                os_thread_self_restrict_rwx_to_rw();
+        }
+}
+
+int pthread_jit_write_protect_supported_np()
+{
+       return os_thread_self_restrict_rwx_is_supported();
+}
+
+#endif // TARGET_OS_OSX
+
+OS_ALWAYS_INLINE
+static inline void
+__pthread_add_thread(pthread_t t, mach_port_t self, bool from_mach_thread)
+{
+       _pthread_lock_lock(&_pthread_list_lock, self);
        TAILQ_INSERT_TAIL(&__pthread_head, t, tl_plist);
        _pthread_count++;
-
-       if (from_mach_thread) {
-               _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
-       } else {
-               _PTHREAD_UNLOCK(_pthread_list_lock);
-       }
+       _pthread_lock_unlock(&_pthread_list_lock, self);
 
        if (!from_mach_thread) {
                // PR-26275485: Mach threads will likely crash trying to run
@@ -1210,32 +1193,22 @@ __pthread_add_thread(pthread_t t, bool from_mach_thread)
        }
 }
 
-PTHREAD_ALWAYS_INLINE
+OS_ALWAYS_INLINE
 static inline void
-__pthread_undo_add_thread(pthread_t t, bool from_mach_thread)
+__pthread_undo_add_thread(pthread_t t, mach_port_t self)
 {
-       if (from_mach_thread) {
-               _PTHREAD_LOCK_FROM_MACH_THREAD(_pthread_list_lock);
-       } else {
-               _PTHREAD_LOCK(_pthread_list_lock);
-       }
-
+       _pthread_lock_lock(&_pthread_list_lock, self);
        TAILQ_REMOVE(&__pthread_head, t, tl_plist);
        _pthread_count--;
-
-       if (from_mach_thread) {
-               _PTHREAD_UNLOCK_FROM_MACH_THREAD(_pthread_list_lock);
-       } else {
-               _PTHREAD_UNLOCK(_pthread_list_lock);
-       }
+       _pthread_lock_unlock(&_pthread_list_lock, self);
 }
 
-PTHREAD_ALWAYS_INLINE
+OS_ALWAYS_INLINE
 static inline void
 __pthread_started_thread(pthread_t t)
 {
-       mach_port_t kport = _pthread_kernel_thread(t);
-       if (os_slowpath(!MACH_PORT_VALID(kport))) {
+       mach_port_t kport = _pthread_tsd_slot(t, MACH_THREAD_SELF);
+       if (os_unlikely(!MACH_PORT_VALID(kport))) {
                PTHREAD_CLIENT_CRASH(kport,
                                "Unable to allocate thread port, possible port leak");
        }
@@ -1253,6 +1226,8 @@ _pthread_create(pthread_t *thread, const pthread_attr_t *attrs,
        pthread_t t = NULL;
        void *stack = NULL;
        bool from_mach_thread = (create_flags & _PTHREAD_CREATE_FROM_MACH_THREAD);
+       mach_port_t self_kport;
+       int rc = 0;
 
        if (attrs == NULL) {
                attrs = &_pthread_attr_default;
@@ -1277,14 +1252,20 @@ _pthread_create(pthread_t *thread, const pthread_attr_t *attrs,
 
        __is_threaded = 1;
 
-       t =_pthread_allocate(attrs, &stack);
+       t = _pthread_allocate(attrs, &stack, from_mach_thread);
        if (t == NULL) {
                return EAGAIN;
        }
 
+       if (os_unlikely(from_mach_thread)) {
+               self_kport = mach_thread_self();
+       } else {
+               self_kport = _pthread_mach_thread_self_direct();
+       }
+
        t->arg = arg;
        t->fun = start_routine;
-       __pthread_add_thread(t, from_mach_thread);
+       __pthread_add_thread(t, self_kport, from_mach_thread);
 
        if (__bsdthread_create(start_routine, arg, stack, t, flags) ==
                        (pthread_t)-1) {
@@ -1292,18 +1273,18 @@ _pthread_create(pthread_t *thread, const pthread_attr_t *attrs,
                        PTHREAD_CLIENT_CRASH(0,
                                        "Unable to allocate thread port, possible port leak");
                }
-               __pthread_undo_add_thread(t, from_mach_thread);
+               __pthread_undo_add_thread(t, self_kport);
                _pthread_deallocate(t, from_mach_thread);
-               return EAGAIN;
+               t = NULL;
+               rc = EAGAIN;
        }
-
-       if (create_flags & _PTHREAD_CREATE_SUSPENDED) {
-               _pthread_markcancel_if_canceled(t, _pthread_kernel_thread(t));
+       if (from_mach_thread) {
+               mach_port_deallocate(mach_task_self(), self_kport);
        }
 
        // n.b. if a thread is created detached and exits, t will be invalid
        *thread = t;
-       return 0;
+       return rc;
 }
 
 int
@@ -1322,76 +1303,14 @@ pthread_create_from_mach_thread(pthread_t *thread, const pthread_attr_t *attr,
        return _pthread_create(thread, attr, start_routine, arg, flags);
 }
 
-#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
-/* Functions defined in machine-dependent files. */
-PTHREAD_NOEXPORT void _pthread_setup_suspended(pthread_t th, void (*f)(pthread_t), void *sp);
-
-PTHREAD_NORETURN
-static void
-_pthread_suspended_body(pthread_t self)
-{
-       _pthread_set_self(self);
-       __pthread_started_thread(self);
-       _pthread_exit(self, (self->fun)(self->arg));
-}
-
-static int
-_pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attrs,
-               void *(*start_routine)(void *), void *arg)
-{
-       pthread_t t;
-       void *stack;
-       mach_port_t kernel_thread = MACH_PORT_NULL;
-
-       if (attrs == NULL) {
-               attrs = &_pthread_attr_default;
-       } else if (attrs->sig != _PTHREAD_ATTR_SIG) {
-               return EINVAL;
-       }
-
-       t = _pthread_allocate(attrs, &stack);
-       if (t == NULL) {
-               return EAGAIN;
-       }
-
-       if (thread_create(mach_task_self(), &kernel_thread) != KERN_SUCCESS) {
-               _pthread_deallocate(t, false);
-               return EAGAIN;
-       }
-
-       _pthread_set_kernel_thread(t, kernel_thread);
-       (void)pthread_setschedparam_internal(t, kernel_thread,
-                       t->tl_policy, &t->tl_param);
-
-       __is_threaded = 1;
-
-       t->arg = arg;
-       t->fun = start_routine;
-       t->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
-       __pthread_add_thread(t, false);
-
-       // Set up a suspended thread.
-       _pthread_setup_suspended(t, _pthread_suspended_body, stack);
-       *thread = t;
-       return 0;
-}
-#endif // !defined(__OPEN_SOURCE__) && TARGET_OS_OSX
-
 int
 pthread_create_suspended_np(pthread_t *thread, const pthread_attr_t *attr,
                void *(*start_routine)(void *), void *arg)
 {
-#if !defined(__OPEN_SOURCE__) && TARGET_OS_OSX // 40703288
-       if (_os_xbs_chrooted) {
-               return _pthread_create_suspended_np(thread, attr, start_routine, arg);
-       }
-#endif
        unsigned int flags = _PTHREAD_CREATE_SUSPENDED;
        return _pthread_create(thread, attr, start_routine, arg, flags);
 }
 
-
-PTHREAD_NOEXPORT_VARIANT
 int
 pthread_detach(pthread_t thread)
 {
@@ -1414,7 +1333,7 @@ pthread_detach(pthread_t thread)
                        wake = true;
                }
        }
-       _PTHREAD_UNLOCK(_pthread_list_lock);
+       _pthread_lock_unlock(&_pthread_list_lock);
 
        if (join) {
                pthread_join(thread, NULL);
@@ -1424,7 +1343,6 @@ pthread_detach(pthread_t thread)
        return res;
 }
 
-PTHREAD_NOEXPORT_VARIANT
 int
 pthread_kill(pthread_t th, int sig)
 {
@@ -1433,13 +1351,10 @@ pthread_kill(pthread_t th, int sig)
        }
 
        mach_port_t kport = MACH_PORT_NULL;
-       if (!_pthread_is_valid(th, &kport)) {
-               return ESRCH; // Not a valid thread.
-       }
-
-       // Don't signal workqueue threads.
-       if (th->wqthread != 0 && th->wqkillset == 0) {
-               return ENOTSUP;
+       {
+               if (!_pthread_is_valid(th, &kport)) {
+                       return ESRCH;
+               }
        }
 
        int ret = __pthread_kill(kport, sig);
@@ -1450,34 +1365,19 @@ pthread_kill(pthread_t th, int sig)
        return ret;
 }
 
-PTHREAD_NOEXPORT_VARIANT
 int
 __pthread_workqueue_setkill(int enable)
 {
-       pthread_t self = pthread_self();
-
-       _PTHREAD_LOCK(self->lock);
-       self->wqkillset = enable ? 1 : 0;
-       _PTHREAD_UNLOCK(self->lock);
-
-       return 0;
-}
-
-
-/* For compatibility... */
-
-pthread_t
-_pthread_self(void)
-{
-       return pthread_self();
+       {
+               return __bsdthread_ctl(BSDTHREAD_CTL_WORKQ_ALLOW_KILL, enable, 0, 0);
+       }
 }
 
 /*
  * Terminate a thread.
  */
-extern int __disable_threadsignal(int);
 
-PTHREAD_NORETURN
+OS_NORETURN
 static void
 _pthread_exit(pthread_t self, void *exit_value)
 {
@@ -1509,11 +1409,17 @@ pthread_exit(void *exit_value)
                PTHREAD_CLIENT_CRASH(0, "pthread_exit() called from a thread "
                                "not created by pthread_create()");
        }
+       _pthread_validate_signature(self);
        _pthread_exit(self, exit_value);
 }
 
+int
+pthread_self_is_exiting_np(void)
+{
+       return (os_atomic_load(&pthread_self()->cancel_state, relaxed) &
+                       _PTHREAD_CANCEL_EXITING) != 0;
+}
 
-PTHREAD_NOEXPORT_VARIANT
 int
 pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param)
 {
@@ -1523,13 +1429,11 @@ pthread_getschedparam(pthread_t thread, int *policy, struct sched_param *param)
 
        if (policy) *policy = thread->tl_policy;
        if (param) *param = thread->tl_param;
-       _PTHREAD_UNLOCK(_pthread_list_lock);
+       _pthread_lock_unlock(&_pthread_list_lock);
        return 0;
 }
 
-
-
-PTHREAD_ALWAYS_INLINE
+OS_ALWAYS_INLINE
 static inline int
 pthread_setschedparam_internal(pthread_t thread, mach_port_t kport, int policy,
                const struct sched_param *param)
@@ -1539,32 +1443,35 @@ pthread_setschedparam_internal(pthread_t thread, mach_port_t kport, int policy,
        mach_msg_type_number_t count;
        kern_return_t ret;
 
+       if (os_unlikely(thread->wqthread)) {
+               return ENOTSUP;
+       }
+
        switch (policy) {
-               case SCHED_OTHER:
-                       bases.ts.base_priority = param->sched_priority;
-                       base = (policy_base_t)&bases.ts;
-                       count = POLICY_TIMESHARE_BASE_COUNT;
-                       break;
-               case SCHED_FIFO:
-                       bases.fifo.base_priority = param->sched_priority;
-                       base = (policy_base_t)&bases.fifo;
-                       count = POLICY_FIFO_BASE_COUNT;
-                       break;
-               case SCHED_RR:
-                       bases.rr.base_priority = param->sched_priority;
-                       /* quantum isn't public yet */
-                       bases.rr.quantum = param->quantum;
-                       base = (policy_base_t)&bases.rr;
-                       count = POLICY_RR_BASE_COUNT;
-                       break;
-               default:
-                       return EINVAL;
+       case SCHED_OTHER:
+               bases.ts.base_priority = param->sched_priority;
+               base = (policy_base_t)&bases.ts;
+               count = POLICY_TIMESHARE_BASE_COUNT;
+               break;
+       case SCHED_FIFO:
+               bases.fifo.base_priority = param->sched_priority;
+               base = (policy_base_t)&bases.fifo;
+               count = POLICY_FIFO_BASE_COUNT;
+               break;
+       case SCHED_RR:
+               bases.rr.base_priority = param->sched_priority;
+               /* quantum isn't public yet */
+               bases.rr.quantum = param->quantum;
+               base = (policy_base_t)&bases.rr;
+               count = POLICY_RR_BASE_COUNT;
+               break;
+       default:
+               return EINVAL;
        }
        ret = thread_policy(kport, policy, base, count, TRUE);
        return (ret != KERN_SUCCESS) ? EINVAL : 0;
 }
 
-PTHREAD_NOEXPORT_VARIANT
 int
 pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
 {
@@ -1573,7 +1480,8 @@ pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
 
        // since the main thread will not get de-allocated from underneath us
        if (t == pthread_self() || t == main_thread()) {
-               kport = _pthread_kernel_thread(t);
+               _pthread_validate_signature(t);
+               kport = _pthread_tsd_slot(t, MACH_THREAD_SELF);
        } else {
                bypass = 0;
                if (!_pthread_is_valid(t, &kport)) {
@@ -1585,7 +1493,7 @@ pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
        if (res) return res;
 
        if (bypass) {
-               _PTHREAD_LOCK(_pthread_list_lock);
+               _pthread_lock_lock(&_pthread_list_lock);
        } else if (!_pthread_validate_thread_and_list_lock(t)) {
                // Ensure the thread is still valid.
                return ESRCH;
@@ -1593,11 +1501,10 @@ pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param)
 
        t->tl_policy = policy;
        t->tl_param = *param;
-       _PTHREAD_UNLOCK(_pthread_list_lock);
+       _pthread_lock_unlock(&_pthread_list_lock);
        return 0;
 }
 
-
 int
 sched_get_priority_min(int policy)
 {
@@ -1616,11 +1523,7 @@ pthread_equal(pthread_t t1, pthread_t t2)
        return (t1 == t2);
 }
 
-/*
- * Force LLVM not to optimise this to a call to __pthread_set_self, if it does
- * then _pthread_set_self won't be bound when secondary threads try and start up.
- */
-PTHREAD_NOINLINE
+OS_NOINLINE
 void
 _pthread_set_self(pthread_t p)
 {
@@ -1629,15 +1532,16 @@ _pthread_set_self(pthread_t p)
                return _pthread_set_self_dyld();
        }
 #endif // VARIANT_DYLD
-       _pthread_set_self_internal(p, true);
+       _pthread_set_self_internal(p);
+       _thread_set_tsd_base(&p->tsd[0]);
 }
 
 #if VARIANT_DYLD
 // _pthread_set_self_dyld is noinline+noexport to allow the option for
 // static libsyscall to adopt this as the entry point from mach_init if
 // desired
-PTHREAD_NOINLINE PTHREAD_NOEXPORT
-void
+OS_NOINLINE
+static void
 _pthread_set_self_dyld(void)
 {
        pthread_t p = main_thread();
@@ -1652,30 +1556,29 @@ _pthread_set_self_dyld(void)
        // this, TSD access will fail and crash if it uses bits of Libc prior to
        // library initialization. __pthread_init will finish the initialization
        // during library init.
-       p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_SELF] = p;
-       p->tsd[_PTHREAD_TSD_SLOT_ERRNO] = &p->err_no;
+       _pthread_tsd_slot(p, PTHREAD_SELF) = p;
+       _pthread_tsd_slot(p, ERRNO) = &p->err_no;
        _thread_set_tsd_base(&p->tsd[0]);
 }
 #endif // VARIANT_DYLD
 
-PTHREAD_ALWAYS_INLINE
+OS_ALWAYS_INLINE
 static inline void
-_pthread_set_self_internal(pthread_t p, bool needs_tsd_base_set)
+_pthread_set_self_internal(pthread_t p)
 {
-       p->thread_id = __thread_selfid();
+#if __LP64__
+       os_atomic_store(&p->thread_id, __thread_selfid(), relaxed);
+#else
+       os_atomic_store_wide(&p->thread_id, __thread_selfid(), relaxed);
+#endif
 
        if (os_unlikely(p->thread_id == -1ull)) {
                PTHREAD_INTERNAL_CRASH(0, "failed to set thread_id");
        }
-
-       if (needs_tsd_base_set) {
-               _thread_set_tsd_base(&p->tsd[0]);
-       }
 }
 
-
 // <rdar://problem/28984807> pthread_once should have an acquire barrier
-PTHREAD_ALWAYS_INLINE
+OS_ALWAYS_INLINE
 static inline void
 _os_once_acquire(os_once_t *predicate, void *context, os_function_t function)
 {
@@ -1700,7 +1603,6 @@ __pthread_once_handler(void *context)
        ctx->pthread_once->sig = _PTHREAD_ONCE_SIG;
 }
 
-PTHREAD_NOEXPORT_VARIANT
 int
 pthread_once(pthread_once_t *once_control, void (*init_routine)(void))
 {
@@ -1711,7 +1613,6 @@ pthread_once(pthread_once_t *once_control, void (*init_routine)(void))
        return 0;
 }
 
-
 int
 pthread_getconcurrency(void)
 {
@@ -1728,30 +1629,9 @@ pthread_setconcurrency(int new_level)
        return 0;
 }
 
-#if !defined(VARIANT_STATIC)
-void *
-malloc(size_t sz)
-{
-       if (_pthread_malloc) {
-               return _pthread_malloc(sz);
-       } else {
-               return NULL;
-       }
-}
-
-void
-free(void *p)
-{
-       if (_pthread_free) {
-               _pthread_free(p);
-       }
-}
-#endif // VARIANT_STATIC
-
 /*
  * Perform package initialization - called automatically when application starts
  */
-struct ProgramVars; /* forward reference */
 
 #if !VARIANT_DYLD
 static unsigned long
@@ -1816,19 +1696,51 @@ static void
 parse_ptr_munge_params(const char *envp[], const char *apple[])
 {
        const char *p, *s;
+       uintptr_t token = 0;
        p = _simple_getenv(apple, "ptr_munge");
        if (p) {
-               _pthread_ptr_munge_token = _pthread_strtoul(p, &s, 16);
+               token = _pthread_strtoul(p, &s, 16);
                bzero((char *)p, strlen(p));
        }
+       /*
+        * In DEBUG we allow the environment variable to override the kernel
+        * security setting, including setting it to 0 which is helpful during
+        * debugging sessions.
+        *
+        * For other cases, the token must be set by the kernel or the environment
+        * variable to a non 0 value.
+        */
 #if !DEBUG
-       if (_pthread_ptr_munge_token) return;
+       if (!token) {
 #endif
-       p = _simple_getenv(envp, "PTHREAD_PTR_MUNGE_TOKEN");
-       if (p) {
-               uintptr_t t = _pthread_strtoul(p, &s, 16);
-               if (t) _pthread_ptr_munge_token = t;
+               p = _simple_getenv(envp, "PTHREAD_PTR_MUNGE_TOKEN");
+               if (p) {
+                       uintptr_t t = _pthread_strtoul(p, &s, 16);
+                       if (t) token = t;
+               }
+#if !DEBUG
+       }
+
+       if (!token) {
+               PTHREAD_INTERNAL_CRASH(token, "Token from the kernel is 0");
        }
+#endif // !DEBUG
+
+       _pthread_ptr_munge_token = token;
+       // we need to refresh the main thread signature now that we changed
+       // the munge token. We need to do it while TSAN will not look at it
+       _pthread_init_signature(_main_thread_ptr);
+}
+
+static void
+parse_main_thread_port(const char *apple[], mach_port_name_t *main_th)
+{
+       const char *p, *s;
+       p = _simple_getenv(apple, "th_port");
+       if (p) {
+               *main_th = (mach_port_name_t)_pthread_strtoul(p, &s, 16);
+               bzero((char *)p, strlen(p));
+       }
 }
 
 int
@@ -1846,6 +1758,17 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
                }
        }
 
+       // libpthread.a in dyld "owns" the main thread structure itself and sets
+       // up the tsd to point to it. So take the pthread_self() from there
+       // and make it our main thread point.
+       pthread_t thread = _pthread_self_direct();
+       if (os_unlikely(thread == NULL)) {
+               PTHREAD_INTERNAL_CRASH(0, "PTHREAD_SELF TSD not initialized");
+       }
+       _main_thread_ptr = thread;
+       // this needs to be done early so that pthread_self() works in TSAN
+       _pthread_init_signature(thread);
+
        //
        // Get host information
        //
@@ -1894,27 +1817,23 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
        // Initialize random ptr_munge token from the kernel.
        parse_ptr_munge_params(envp, apple);
 
-       // libpthread.a in dyld "owns" the main thread structure itself and sets
-       // up the tsd to point to it. So take the pthread_self() from there
-       // and make it our main thread point.
-       pthread_t thread = (pthread_t)_pthread_getspecific_direct(
-                       _PTHREAD_TSD_SLOT_PTHREAD_SELF);
-       PTHREAD_ASSERT(thread);
-       _main_thread_ptr = thread;
-
-       PTHREAD_ASSERT(_pthread_attr_default.qosclass ==
+       PTHREAD_DEBUG_ASSERT(_pthread_attr_default.qosclass ==
                        _pthread_default_priority(0));
        _pthread_struct_init(thread, &_pthread_attr_default,
                        stackaddr, stacksize, allocaddr, allocsize);
        thread->tl_joinable = true;
 
+       // Get main thread port name from the kernel.
+       mach_port_name_t main_th_port = MACH_PORT_NULL;
+       parse_main_thread_port(apple, &main_th_port);
+
        // Finish initialization with common code that is reinvoked on the
        // child side of a fork.
 
        // Finishes initialization of main thread attributes.
        // Initializes the thread list and add the main thread.
        // Calls _pthread_set_self() to prepare the main thread for execution.
-       _pthread_main_thread_init(thread);
+       _pthread_main_thread_init(thread, main_th_port);
 
        struct _pthread_registration_data registration_data;
        // Set up kernel entry points with __bsdthread_register.
@@ -1937,20 +1856,25 @@ __pthread_init(const struct _libpthread_functions *pthread_funcs,
 }
 #endif // !VARIANT_DYLD
 
-PTHREAD_NOEXPORT void
-_pthread_main_thread_init(pthread_t p)
+void
+_pthread_main_thread_init(pthread_t p, mach_port_name_t main_thread_port)
 {
        TAILQ_INIT(&__pthread_head);
-       _PTHREAD_LOCK_INIT(_pthread_list_lock);
-       _PTHREAD_LOCK_INIT(p->lock);
-       _pthread_set_kernel_thread(p, mach_thread_self());
-       _pthread_set_reply_port(p, mach_reply_port());
+       _pthread_lock_init(&_pthread_list_lock);
+       _pthread_lock_init(&p->lock);
        p->__cleanup_stack = NULL;
        p->tl_join_ctx = NULL;
        p->tl_exit_gate = MACH_PORT_NULL;
-       p->tsd[__TSD_SEMAPHORE_CACHE] = (void*)(uintptr_t)SEMAPHORE_NULL;
-       p->tsd[__TSD_MACH_SPECIAL_REPLY] = 0;
-       p->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
+
+       if (main_thread_port != MACH_PORT_NULL) {
+               _pthread_tsd_slot(p, MACH_THREAD_SELF) = main_thread_port;
+       } else {
+               // Can't get thread port from kernel or we are forking, fallback to mach_thread_self
+               _pthread_tsd_slot(p, MACH_THREAD_SELF) = mach_thread_self();
+       }
+       _pthread_tsd_slot(p, MIG_REPLY) = mach_reply_port();
+       _pthread_tsd_slot(p, MACH_SPECIAL_REPLY) = MACH_PORT_NULL;
+       _pthread_tsd_slot(p, SEMAPHORE_CACHE) = SEMAPHORE_NULL;
 
        // Initialize the list of threads with the new main thread.
        TAILQ_INSERT_HEAD(&__pthread_head, p, tl_plist);
@@ -1959,12 +1883,11 @@ _pthread_main_thread_init(pthread_t p)
        _pthread_introspection_thread_start(p);
 }
 
-PTHREAD_NOEXPORT
 void
 _pthread_main_thread_postfork_init(pthread_t p)
 {
-       _pthread_main_thread_init(p);
-       _pthread_set_self_internal(p, false);
+       _pthread_main_thread_init(p, MACH_PORT_NULL);
+       _pthread_set_self_internal(p);
 }
 
 int
@@ -1974,54 +1897,64 @@ sched_yield(void)
        return 0;
 }
 
-// XXX remove
-void
-cthread_yield(void)
+// Libsystem knows about this symbol and exports it to libsyscall
+int
+pthread_current_stack_contains_np(const void *addr, size_t length)
 {
-       sched_yield();
-}
+       uintptr_t begin = (uintptr_t) addr, end;
+       uintptr_t stack_base = (uintptr_t) _pthread_self_direct()->stackbottom;
+       uintptr_t stack_top = (uintptr_t) _pthread_self_direct()->stackaddr;
 
-void
-pthread_yield_np(void)
-{
-       sched_yield();
-}
+       if (stack_base == stack_top) {
+               return -ENOTSUP;
+       }
 
+       if (__builtin_add_overflow(begin, length, &end)) {
+               return -EINVAL;
+       }
 
+       return stack_base <= begin && end <= stack_top;
+}
 
 // Libsystem knows about this symbol and exports it to libsyscall
-PTHREAD_NOEXPORT_VARIANT
+
 void
-_pthread_clear_qos_tsd(mach_port_t thread_port)
+_pthread_clear_qos_tsd(mach_port_t port)
 {
-       if (thread_port == MACH_PORT_NULL || (uintptr_t)_pthread_getspecific_direct(_PTHREAD_TSD_SLOT_MACH_THREAD_SELF) == thread_port) {
+       pthread_priority_t pp = _pthread_unspecified_priority();
+
+       if (port == MACH_PORT_NULL || _pthread_mach_thread_self_direct() == port) {
                /* Clear the current thread's TSD, that can be done inline. */
-               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS,
-                               _pthread_unspecified_priority());
+               _pthread_setspecific_direct(_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS, pp);
        } else {
                pthread_t p;
 
-               _PTHREAD_LOCK(_pthread_list_lock);
+               _pthread_lock_lock(&_pthread_list_lock);
 
                TAILQ_FOREACH(p, &__pthread_head, tl_plist) {
-                       mach_port_t kp = _pthread_kernel_thread(p);
-                       if (thread_port == kp) {
-                               p->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] =
-                                               _pthread_unspecified_priority();
+                       mach_port_t kp = _pthread_tsd_slot(p, MACH_THREAD_SELF);
+                       if (port == kp) {
+                               _pthread_tsd_slot(p, PTHREAD_QOS_CLASS) = pp;
                                break;
                        }
                }
 
-               _PTHREAD_UNLOCK(_pthread_list_lock);
+               _pthread_lock_unlock(&_pthread_list_lock);
        }
 }
 
-
 #pragma mark pthread/stack_np.h public interface
 
-
 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__arm64__)
+#if __ARM64_ARCH_8_32__
+/*
+ * arm64_32 uses 64-bit sizes for the frame pointer and
+ * return address of a stack frame.
+ */
+typedef uint64_t frame_data_addr_t;
+#else
 typedef uintptr_t frame_data_addr_t;
+#endif
 
 struct frame_data {
        frame_data_addr_t frame_addr_next;
@@ -2037,25 +1970,33 @@ pthread_stack_frame_decode_np(uintptr_t frame_addr, uintptr_t *return_addr)
        struct frame_data *frame = (struct frame_data *)frame_addr;
 
        if (return_addr) {
+#if __has_feature(ptrauth_calls)
+               *return_addr = (uintptr_t)ptrauth_strip((void *)frame->ret_addr,
+                               ptrauth_key_return_address);
+#else
                *return_addr = (uintptr_t)frame->ret_addr;
+#endif /* __has_feature(ptrauth_calls) */
        }
 
+#if __has_feature(ptrauth_calls)
+       return (uintptr_t)ptrauth_strip((void *)frame->frame_addr_next,
+                       ptrauth_key_frame_pointer);
+#endif /* __has_feature(ptrauth_calls) */
        return (uintptr_t)frame->frame_addr_next;
 }
 
-
 #pragma mark pthread workqueue support routines
 
-
-PTHREAD_NOEXPORT void
+void
 _pthread_bsdthread_init(struct _pthread_registration_data *data)
 {
        bzero(data, sizeof(*data));
        data->version = sizeof(struct _pthread_registration_data);
        data->dispatch_queue_offset = __PTK_LIBDISPATCH_KEY0 * sizeof(void *);
        data->return_to_kernel_offset = __TSD_RETURN_TO_KERNEL * sizeof(void *);
-       data->tsd_offset = offsetof(struct _pthread, tsd);
+       data->tsd_offset = offsetof(struct pthread_s, tsd);
        data->mach_thread_self_offset = __TSD_MACH_THREAD_SELF * sizeof(void *);
+       data->joinable_offset_bits = CHAR_BIT * (offsetof(struct pthread_s, tl_policy) + 1);
 
        int rv = __bsdthread_register(thread_start, start_wqthread, (int)PTHREAD_SIZE,
                        (void*)data, (uintptr_t)sizeof(*data), data->dispatch_queue_offset);
@@ -2086,7 +2027,7 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
 
        if (_pthread_priority_thread_qos(main_qos) != THREAD_QOS_UNSPECIFIED) {
                _pthread_set_main_qos(main_qos);
-               main_thread()->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = main_qos;
+               _pthread_tsd_slot(main_thread(), PTHREAD_QOS_CLASS) = main_qos;
        }
 
        if (data->stack_addr_hint) {
@@ -2099,7 +2040,7 @@ _pthread_bsdthread_init(struct _pthread_registration_data *data)
        }
 }
 
-PTHREAD_NOINLINE
+OS_NOINLINE
 static void
 _pthread_wqthread_legacy_worker_wrap(pthread_priority_t pp)
 {
@@ -2130,7 +2071,7 @@ _pthread_wqthread_legacy_worker_wrap(pthread_priority_t pp)
        PTHREAD_INTERNAL_CRASH(pp, "Invalid pthread priority for the legacy interface");
 }
 
-PTHREAD_ALWAYS_INLINE
+OS_ALWAYS_INLINE
 static inline pthread_priority_t
 _pthread_wqthread_priority(int flags)
 {
@@ -2159,7 +2100,7 @@ _pthread_wqthread_priority(int flags)
        return pp;
 }
 
-PTHREAD_NOINLINE
+OS_NOINLINE
 static void
 _pthread_wqthread_setup(pthread_t self, mach_port_t kport, void *stacklowaddr,
                int flags)
@@ -2171,32 +2112,34 @@ _pthread_wqthread_setup(pthread_t self, mach_port_t kport, void *stacklowaddr,
                        PTHREAD_ALLOCADDR(stackaddr, stacksize),
                        PTHREAD_ALLOCSIZE(stackaddr, stacksize));
 
-       _pthread_set_kernel_thread(self, kport);
+       _pthread_tsd_slot(self, MACH_THREAD_SELF) = kport;
        self->wqthread = 1;
        self->wqkillset = 0;
        self->tl_joinable = false;
-       self->cancel_state |= _PTHREAD_CANCEL_INITIALIZED;
 
        // Update the running thread count and set childrun bit.
-       bool thread_tsd_base_set = (bool)(flags & WQ_FLAG_THREAD_TSD_BASE_SET);
-       _pthread_set_self_internal(self, !thread_tsd_base_set);
-       __pthread_add_thread(self, false);
+       if (os_unlikely((flags & WQ_FLAG_THREAD_TSD_BASE_SET) == 0)) {
+               PTHREAD_INTERNAL_CRASH(flags,
+                               "thread_set_tsd_base() wasn't called by the kernel");
+       }
+       _pthread_set_self_internal(self);
+       __pthread_add_thread(self, kport, false);
        __pthread_started_thread(self);
 }
 
-PTHREAD_NORETURN PTHREAD_NOINLINE
+OS_NORETURN OS_NOINLINE
 static void
 _pthread_wqthread_exit(pthread_t self)
 {
-       pthread_priority_t pp;
+       const thread_qos_t WORKQ_THREAD_QOS_CLEANUP = THREAD_QOS_LEGACY;
+       pthread_priority_t pp = _pthread_tsd_slot(self, PTHREAD_QOS_CLASS);
        thread_qos_t qos;
 
-       pp = (pthread_priority_t)self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS];
        qos = _pthread_priority_thread_qos(pp);
        if (qos == THREAD_QOS_UNSPECIFIED || qos > WORKQ_THREAD_QOS_CLEANUP) {
                // Reset QoS to something low for the cleanup process
                pp = _pthread_priority_make_from_thread_qos(WORKQ_THREAD_QOS_CLEANUP, 0, 0);
-               self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp;
+               _pthread_tsd_slot(self, PTHREAD_QOS_CLASS) = pp;
        }
 
        _pthread_exit(self, NULL);
@@ -2212,36 +2155,36 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr,
        }
 
        pthread_priority_t pp;
+
        if (flags & WQ_FLAG_THREAD_OUTSIDEQOS) {
-               self->wqoutsideqos = 1;
+               self->wq_outsideqos = 1;
                pp = _pthread_priority_make_from_thread_qos(THREAD_QOS_LEGACY, 0,
                                _PTHREAD_PRIORITY_FALLBACK_FLAG);
        } else {
-               self->wqoutsideqos = 0;
+               self->wq_outsideqos = 0;
                pp = _pthread_wqthread_priority(flags);
        }
 
        self->tsd[_PTHREAD_TSD_SLOT_PTHREAD_QOS_CLASS] = (void *)pp;
 
        // avoid spills on the stack hard to keep used stack space minimal
-       if (nkevents == WORKQ_EXIT_THREAD_NKEVENT) {
-               goto exit;
+       if (os_unlikely(nkevents == WORKQ_EXIT_THREAD_NKEVENT)) {
+               _pthread_wqthread_exit(self);
        } else if (flags & WQ_FLAG_THREAD_WORKLOOP) {
+               kqueue_id_t *kqidptr = (kqueue_id_t *)keventlist - 1;
                self->fun = (void *(*)(void*))__libdispatch_workloopfunction;
-               self->wq_retop = WQOPS_THREAD_WORKLOOP_RETURN;
-               self->wq_kqid_ptr = ((kqueue_id_t *)keventlist - 1);
                self->arg = keventlist;
                self->wq_nevents = nkevents;
+               (*__libdispatch_workloopfunction)(kqidptr, &self->arg, &self->wq_nevents);
+               __workq_kernreturn(WQOPS_THREAD_WORKLOOP_RETURN, self->arg, self->wq_nevents, 0);
        } else if (flags & WQ_FLAG_THREAD_KEVENT) {
                self->fun = (void *(*)(void*))__libdispatch_keventfunction;
-               self->wq_retop = WQOPS_THREAD_KEVENT_RETURN;
-               self->wq_kqid_ptr = NULL;
                self->arg = keventlist;
                self->wq_nevents = nkevents;
+               (*__libdispatch_keventfunction)(&self->arg, &self->wq_nevents);
+               __workq_kernreturn(WQOPS_THREAD_KEVENT_RETURN, self->arg, self->wq_nevents, 0);
        } else {
                self->fun = (void *(*)(void*))__libdispatch_workerfunction;
-               self->wq_retop = WQOPS_THREAD_RETURN;
-               self->wq_kqid_ptr = NULL;
                self->arg = (void *)(uintptr_t)pp;
                self->wq_nevents = 0;
                if (os_likely(__workq_newapi)) {
@@ -2249,72 +2192,89 @@ _pthread_wqthread(pthread_t self, mach_port_t kport, void *stacklowaddr,
                } else {
                        _pthread_wqthread_legacy_worker_wrap(pp);
                }
-               goto just_return;
-       }
-
-       if (nkevents > 0) {
-kevent_errors_retry:
-               if (self->wq_retop == WQOPS_THREAD_WORKLOOP_RETURN) {
-                       ((pthread_workqueue_function_workloop_t)self->fun)
-                                       (self->wq_kqid_ptr, &self->arg, &self->wq_nevents);
-               } else {
-                       ((pthread_workqueue_function_kevent_t)self->fun)
-                                       (&self->arg, &self->wq_nevents);
-               }
-               int rc = __workq_kernreturn(self->wq_retop, self->arg, self->wq_nevents, 0);
-               if (os_unlikely(rc > 0)) {
-                       self->wq_nevents = rc;
-                       goto kevent_errors_retry;
-               }
-               if (os_unlikely(rc < 0)) {
-                       PTHREAD_INTERNAL_CRASH(self->err_no, "kevent (workloop) failed");
-               }
-       } else {
-just_return:
-               __workq_kernreturn(self->wq_retop, NULL, 0, 0);
+               __workq_kernreturn(WQOPS_THREAD_RETURN, NULL, 0, 0);
        }
 
-exit:
-       _pthread_wqthread_exit(self);
+       _os_set_crash_log_cause_and_message(self->err_no,
+                       "BUG IN LIBPTHREAD: __workq_kernreturn returned");
+       /*
+        * 52858993: we should never return but the compiler insists on outlining,
+        * so the __builtin_trap() is in _start_wqthread in pthread_asm.s
+        */
 }
 
-
 #pragma mark pthread workqueue API for libdispatch
 
-
 _Static_assert(WORKQ_KEVENT_EVENT_BUFFER_LEN == WQ_KEVENT_LIST_LEN,
                "Kernel and userland should agree on the event list size");
 
 void
 pthread_workqueue_setdispatchoffset_np(int offset)
 {
-       __libdispatch_offset = offset;
+       __workq_kernreturn(WQOPS_QUEUE_NEWSPISUPP, NULL, offset, 0x00);
 }
 
-static int
-pthread_workqueue_setdispatch_with_workloop_np(pthread_workqueue_function2_t queue_func,
-               pthread_workqueue_function_kevent_t kevent_func,
-               pthread_workqueue_function_workloop_t workloop_func)
+int
+pthread_workqueue_setup(struct pthread_workqueue_config *cfg, size_t cfg_size)
 {
-       int res = EBUSY;
+       int rv = EBUSY;
+       struct workq_dispatch_config wdc_cfg;
+       size_t min_size = 0;
+
+       if (cfg_size < sizeof(uint32_t)) {
+               return EINVAL;
+       }
+
+       switch (cfg->version) {
+       case 1:
+               min_size = offsetof(struct pthread_workqueue_config, queue_label_offs);
+               break;
+       case 2:
+               min_size = sizeof(struct pthread_workqueue_config);
+               break;
+       default:
+               return EINVAL;
+       }
+
+       if (!cfg || cfg_size < min_size) {
+               return EINVAL;
+       }
+
+       if (cfg->flags & ~PTHREAD_WORKQUEUE_CONFIG_SUPPORTED_FLAGS ||
+               cfg->version < PTHREAD_WORKQUEUE_CONFIG_MIN_SUPPORTED_VERSION) {
+               return ENOTSUP;
+       }
+
        if (__libdispatch_workerfunction == NULL) {
-               // Check whether the kernel supports new SPIs
-               res = __workq_kernreturn(WQOPS_QUEUE_NEWSPISUPP, NULL, __libdispatch_offset, kevent_func != NULL ? 0x01 : 0x00);
-               if (res == -1){
-                       res = ENOTSUP;
+               __workq_newapi = true;
+
+               wdc_cfg.wdc_version = WORKQ_DISPATCH_CONFIG_VERSION;
+               wdc_cfg.wdc_flags = 0;
+               wdc_cfg.wdc_queue_serialno_offs = cfg->queue_serialno_offs;
+#if WORKQ_DISPATCH_CONFIG_VERSION >= 2
+               wdc_cfg.wdc_queue_label_offs = cfg->queue_label_offs;
+#endif
+
+               // Tell the kernel about dispatch internals
+               rv = (int) __workq_kernreturn(WQOPS_SETUP_DISPATCH, &wdc_cfg, sizeof(wdc_cfg), 0);
+               if (rv == -1) {
+                       return errno;
                } else {
-                       __libdispatch_workerfunction = queue_func;
-                       __libdispatch_keventfunction = kevent_func;
-                       __libdispatch_workloopfunction = workloop_func;
+                       __libdispatch_keventfunction = cfg->kevent_cb;
+                       __libdispatch_workloopfunction = cfg->workloop_cb;
+                       __libdispatch_workerfunction = cfg->workq_cb;
 
                        // Prepare the kernel for workq action
                        (void)__workq_open();
                        if (__is_threaded == 0) {
                                __is_threaded = 1;
                        }
+
+                       return 0;
                }
        }
-       return res;
+
+       return rv;
 }
 
 int
@@ -2323,15 +2283,17 @@ _pthread_workqueue_init_with_workloop(pthread_workqueue_function2_t queue_func,
                pthread_workqueue_function_workloop_t workloop_func,
                int offset, int flags)
 {
-       if (flags != 0) {
-               return ENOTSUP;
-       }
-
-       __workq_newapi = true;
-       __libdispatch_offset = offset;
+       struct pthread_workqueue_config cfg = {
+               .version = PTHREAD_WORKQUEUE_CONFIG_VERSION,
+               .flags = 0,
+               .workq_cb = queue_func,
+               .kevent_cb = kevent_func,
+               .workloop_cb = workloop_func,
+               .queue_serialno_offs = offset,
+               .queue_label_offs = 0,
+       };
 
-       int rv = pthread_workqueue_setdispatch_with_workloop_np(queue_func, kevent_func, workloop_func);
-       return rv;
+       return pthread_workqueue_setup(&cfg, sizeof(cfg));
 }
 
 int
@@ -2351,7 +2313,17 @@ _pthread_workqueue_init(pthread_workqueue_function2_t func, int offset, int flag
 int
 pthread_workqueue_setdispatch_np(pthread_workqueue_function_t worker_func)
 {
-       return pthread_workqueue_setdispatch_with_workloop_np((pthread_workqueue_function2_t)worker_func, NULL, NULL);
+       struct pthread_workqueue_config cfg = {
+               .version = PTHREAD_WORKQUEUE_CONFIG_VERSION,
+               .flags = 0,
+               .workq_cb = (uint64_t)(pthread_workqueue_function2_t)worker_func,
+               .kevent_cb = 0,
+               .workloop_cb = 0,
+               .queue_serialno_offs = 0,
+               .queue_label_offs = 0,
+       };
+
+       return pthread_workqueue_setup(&cfg, sizeof(cfg));
 }
 
 int
@@ -2495,10 +2467,8 @@ _pthread_workloop_destroy(uint64_t workloop_id)
        return res;
 }
 
-
 #pragma mark Introspection SPI for libpthread.
 
-
 static pthread_introspection_hook_t _pthread_introspection_hook;
 
 pthread_introspection_hook_t
@@ -2509,11 +2479,22 @@ pthread_introspection_hook_install(pthread_introspection_hook_t hook)
        return prev;
 }
 
-PTHREAD_NOINLINE
+static inline void
+_pthread_introspection_call_hook(unsigned int event,
+               pthread_t thread, void *addr, size_t size)
+{
+       pthread_t self = pthread_self();
+       uint16_t old = self->introspection;
+       self->introspection = (uint16_t)event;
+       _pthread_introspection_hook(event, thread, addr, size);
+       self->introspection = old;
+}
+
+OS_NOINLINE
 static void
 _pthread_introspection_hook_callout_thread_create(pthread_t t)
 {
-       _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_CREATE, t, t,
+       _pthread_introspection_call_hook(PTHREAD_INTROSPECTION_THREAD_CREATE, t, t,
                        PTHREAD_SIZE);
 }
 
@@ -2524,7 +2505,7 @@ _pthread_introspection_thread_create(pthread_t t)
        _pthread_introspection_hook_callout_thread_create(t);
 }
 
-PTHREAD_NOINLINE
+OS_NOINLINE
 static void
 _pthread_introspection_hook_callout_thread_start(pthread_t t)
 {
@@ -2538,7 +2519,7 @@ _pthread_introspection_hook_callout_thread_start(pthread_t t)
                freesize = t->freesize - PTHREAD_SIZE;
                freeaddr = t->freeaddr;
        }
-       _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_START, t,
+       _pthread_introspection_call_hook(PTHREAD_INTROSPECTION_THREAD_START, t,
                        freeaddr, freesize);
 }
 
@@ -2549,7 +2530,7 @@ _pthread_introspection_thread_start(pthread_t t)
        _pthread_introspection_hook_callout_thread_start(t);
 }
 
-PTHREAD_NOINLINE
+OS_NOINLINE
 static void
 _pthread_introspection_hook_callout_thread_terminate(pthread_t t)
 {
@@ -2563,7 +2544,7 @@ _pthread_introspection_hook_callout_thread_terminate(pthread_t t)
                freesize = t->freesize - PTHREAD_SIZE;
                freeaddr = t->freeaddr;
        }
-       _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_TERMINATE, t,
+       _pthread_introspection_call_hook(PTHREAD_INTROSPECTION_THREAD_TERMINATE, t,
                        freeaddr, freesize);
 }
 
@@ -2574,11 +2555,11 @@ _pthread_introspection_thread_terminate(pthread_t t)
        _pthread_introspection_hook_callout_thread_terminate(t);
 }
 
-PTHREAD_NOINLINE
+OS_NOINLINE
 static void
 _pthread_introspection_hook_callout_thread_destroy(pthread_t t)
 {
-       _pthread_introspection_hook(PTHREAD_INTROSPECTION_THREAD_DESTROY, t, t,
+       _pthread_introspection_call_hook(PTHREAD_INTROSPECTION_THREAD_DESTROY, t, t,
                        PTHREAD_SIZE);
 }
 
@@ -2590,6 +2571,7 @@ _pthread_introspection_thread_destroy(pthread_t t)
 }
 
 #pragma mark libplatform shims
+#if !VARIANT_DYLD
 
 #include <platform/string.h>
 
@@ -2623,3 +2605,4 @@ memcpy(void* a, const void* b, unsigned long s)
        return _platform_memmove(a, b, s);
 }
 
+#endif // !VARIANT_DYLD