.psynch_rw_wrlock = _psynch_rw_wrlock,
        .psynch_rw_yieldwrlock = _psynch_rw_yieldwrlock,
 
+       .pthread_find_owner = _pthread_find_owner,
+       .pthread_get_thread_kwq = _pthread_get_thread_kwq,
+
        .workq_reqthreads = _workq_reqthreads,
        .thread_qos_from_pthread_priority = _thread_qos_from_pthread_priority,
        .pthread_priority_canonicalize2 = _pthread_priority_canonicalize,
 
 #include <kern/thread_call.h>
 #include <sys/pthread_shims.h>
 #include <sys/queue.h>
+#include <kern/kcdata.h>
 #endif
 
 #include "kern/synch_internal.h"
 int _psynch_rw_wrlock(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval);
 int _psynch_rw_yieldwrlock(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval);
 
+void _pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo *waitinfo);
+void * _pthread_get_thread_kwq(thread_t thread);
+
 extern lck_grp_attr_t *pthread_lck_grp_attr;
 extern lck_grp_t *pthread_lck_grp;
 extern lck_attr_t *pthread_lck_attr;
 
                TAILQ_INIT(&wq->wq_thrunlist);
                TAILQ_INIT(&wq->wq_thidlelist);
 
-               wq->wq_atimer_delayed_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
-               wq->wq_atimer_immediate_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
+               wq->wq_atimer_delayed_call =
+                               thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
+                                               (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
+               wq->wq_atimer_immediate_call =
+                               thread_call_allocate_with_priority((thread_call_func_t)workqueue_add_timer,
+                                               (thread_call_param_t)wq, THREAD_CALL_PRIORITY_KERNEL);
 
                lck_spin_init(&wq->wq_lock, pthread_lck_grp, pthread_lck_attr);
 
                        us_to_wait = wq_reduce_pool_window_usecs / 100;
                }
 
+               thread_set_pending_block_hint(thread, kThreadWaitParkedWorkQueue);
                assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
                                TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
                                wq_reduce_pool_window_usecs/10, NSEC_PER_USEC);
                workqueue_lock_spin(wq);
 
                if ( !(tl->th_flags & TH_LIST_RUNNING)) {
+                       thread_set_pending_block_hint(th, kThreadWaitParkedWorkQueue);
                        assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE));
 
                        workqueue_unlock(wq);
 
 #include <kern/zalloc.h>
 #include <kern/sched_prim.h>
 #include <kern/processor.h>
+#include <kern/block_hint.h>
 //#include <kern/mach_param.h>
 #include <mach/mach_vm.h>
 #include <mach/mach_param.h>
 
 static int _wait_result_to_errno(wait_result_t result);
 
-static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t);
+static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t, block_hint_t);
 static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
 static void ksyn_freeallkwe(ksyn_queue_t kq);
 
        ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
        kwq->kw_owner = tid;
 
-       error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue);
+       error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue, kThreadWaitPThreadMutex);
        // ksyn_wait drops wait queue lock
 out:
        ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
                        clock_absolutetime_interval_to_deadline(abstime, &abstime);
                }
                
-               error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue);
+               error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue, kThreadWaitPThreadCondVar);
                // ksyn_wait drops wait queue lock
        }
        
                    _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
                        ksyn_wqunlock(kwq);
                } else {
-                       error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL);
+                       block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
+                               kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
+                       error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL, block_hint);
                        // ksyn_wait drops wait queue lock
                        if (error == 0) {
                                uthread_t uth = current_uthread();
          uint32_t lockseq,
          int fit,
          uint64_t abstime,
-         thread_continue_t continuation)
+         thread_continue_t continuation,
+         block_hint_t block_hint)
 {
        int res;
 
                return res;
        }
        
+       thread_set_pending_block_hint(th, block_hint);
        assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
        ksyn_wqunlock(kwq);
        
        kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
        kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
 }
+
+void *
+_pthread_get_thread_kwq(thread_t thread)
+{
+       assert(thread);
+       struct uthread * uthread = pthread_kern->get_bsdthread_info(thread);
+       assert(uthread);
+       ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uthread);
+       assert(kwe);
+       ksyn_wait_queue_t kwq = kwe->kwe_kwqqueue;
+       return kwq;
+}
+
+/* This function is used by stackshot to determine why a thread is blocked, and report
+ * who owns the object that the thread is blocked on. It should *only* be called if the
+ * `block_hint' field in the relevant thread's struct is populated with something related
+ * to pthread sync objects.
+ */
+void
+_pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo * waitinfo)
+{
+       ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
+       switch (waitinfo->wait_type) {
+               case kThreadWaitPThreadMutex:
+                       assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
+                       waitinfo->owner   = kwq->kw_owner;
+                       waitinfo->context = kwq->kw_addr;
+                       break;
+               /* Owner of rwlock not stored in kernel space due to races. Punt
+                * and hope that the userspace address is helpful enough. */
+               case kThreadWaitPThreadRWLockRead:
+               case kThreadWaitPThreadRWLockWrite:
+                       assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK);
+                       waitinfo->owner   = 0;
+                       waitinfo->context = kwq->kw_addr;
+                       break;
+               /* Condvars don't have owners, so just give the userspace address. */
+               case kThreadWaitPThreadCondVar:
+                       assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
+                       waitinfo->owner   = 0;
+                       waitinfo->context = kwq->kw_addr;
+                       break;
+               case kThreadWaitNone:
+               default:
+                       waitinfo->owner = 0;
+                       waitinfo->context = 0;
+                       break;
+       }
+}
 
 #ifndef _SCHED_H_
 #define _SCHED_H_
 
-#include <pthread_impl.h>
 #include <sys/cdefs.h>
+#include <pthread_impl.h>
 
 __BEGIN_DECLS
 /*
 
 
 // Called before the fork(2) system call is made in the parent process.
 // Iterate pthread_atfork prepare handlers.
+// Called first in libSystem_atfork_prepare().
 void
-_pthread_fork_prepare(void)
+_pthread_atfork_prepare_handlers(void)
 {
        pthread_globals_t globals = _pthread_globals();
 
        _PTHREAD_LOCK(globals->pthread_atfork_lock);
-       
        size_t idx;
        for (idx = globals->atfork_count; idx > 0; --idx) {
                struct pthread_atfork_entry *e = &globals->atfork[idx-1];
                        e->prepare();
                }
        }
+}
+
+// Take pthread-internal locks.
+// Called last in libSystem_atfork_prepare().
+void
+_pthread_fork_prepare(void)
+{
+       pthread_globals_t globals = _pthread_globals();
 
        _PTHREAD_LOCK(globals->psaved_self_global_lock);
        globals->psaved_self = pthread_self();
 }
 
 // Called after the fork(2) system call returns to the parent process.
-// Iterate pthread_atfork parent handlers.
+// Release pthread-internal locks
+// Called first in libSystem_atfork_parent().
 void
 _pthread_fork_parent(void)
 {
 
        _PTHREAD_UNLOCK(globals->psaved_self->lock);
        _PTHREAD_UNLOCK(globals->psaved_self_global_lock);
+}
+
+// Iterate pthread_atfork parent handlers.
+// Called last in libSystem_atfork_parent().
+void
+_pthread_atfork_parent_handlers(void)
+{
+       pthread_globals_t globals = _pthread_globals();
 
        size_t idx;
        for (idx = 0; idx < globals->atfork_count; ++idx) {
 // Called after the fork(2) system call returns to the new child process.
 // Clean up data structures of other threads which no longer exist in the child.
 // Make the current thread the main thread.
+// Called first in libSystem_atfork_child() (after _dyld_fork_child)
 void
 _pthread_fork_child(void)
 {
 }
 
 // Iterate pthread_atfork child handlers.
+// Called last in libSystem_atfork_child().
 void
-_pthread_fork_child_postinit(void)
+_pthread_atfork_child_handlers(void)
 {
        pthread_globals_t globals = _pthread_globals();
        size_t idx;
        }
        _PTHREAD_LOCK_INIT(globals->pthread_atfork_lock);
 }
+
+// Preserve legacy symbol in case somebody depends on it
+void
+_pthread_fork_child_postinit(void)
+{
+       _pthread_atfork_child_handlers();
+}
 
 trace_codename = function(codename, callback)
        local debugid = trace.debugid(codename)
        if debugid ~= 0 then 
-               trace.single(debugid,callback) 
+               trace.single(debugid,callback)
        else
                printf("WARNING: Cannot locate debugid for '%s'\n", codename)
        end
        if initial_timestamp == 0 then
                initial_timestamp = buf.timestamp
        end
-       local secs = (buf.timestamp - initial_timestamp) / 1000000000
+       local secs = trace.convert_timestamp_to_nanoseconds(buf.timestamp - initial_timestamp) / 1000000000
 
        local prefix
-       if trace.debugid_is_start(buf.debugid) then 
-               prefix = "→" 
-       elseif trace.debugid_is_end(buf.debugid) then 
-               prefix = "←" 
-       else 
-               prefix = "↔" 
+       if trace.debugid_is_start(buf.debugid) then
+               prefix = "→"
+       elseif trace.debugid_is_end(buf.debugid) then
+               prefix = "←"
+       else
+               prefix = "↔"
        end
 
        local proc
        end
 end)
 
+trace.enable_thread_cputime()
+runitem_time_map = {}
+runitem_cputime_map = {}
 trace_codename("wq_runitem", function(buf)
        local prefix = get_prefix(buf)
        if trace.debugid_is_start(buf.debugid) then
+               runitem_time_map[buf.threadid] = buf.timestamp;
+               runitem_cputime_map[buf.threadid] = trace.cputime_for_thread(buf.threadid);
+
                printf("%s\tSTART running item\n", prefix)
+       elseif runitem_time_map[buf.threadid] then
+               local time = buf.timestamp - runitem_time_map[buf.threadid]
+               local cputime = trace.cputime_for_thread(buf.threadid) - runitem_cputime_map[buf.threadid]
+
+               local time_ms = trace.convert_timestamp_to_nanoseconds(time) / 1000000
+               local cputime_ms = trace.convert_timestamp_to_nanoseconds(cputime) / 1000000
+
+               printf("%s\tDONE running item: time = %6.6f ms, cputime = %6.6f ms\n", prefix, time_ms, cputime_ms)
+
+               runitem_time_map[buf.threadid] = 0
+               runitem_cputime_map[buf.threadid] = 0
        else
-               printf("%s\tDONE running item; thread returned to kernel\n", prefix)
+               printf("%s\tDONE running item\n", prefix)
        end
 end)