xnu-2782.1.97.tar.gz

[apple/xnu.git] / osfmk / kern / kern_stackshot.c
diff --git a/osfmk/kern/kern_stackshot.c b/osfmk/kern/kern_stackshot.c

new file mode 100644 (file)

index 0000000..cff2dbb
--- /dev/null
+++ b/osfmk/kern/kern_stackshot.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2013 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/vm_param.h>
+#ifdef IMPORTANCE_INHERITANCE
+#include <ipc/ipc_importance.h>
+#endif
+#include <sys/appleapiopts.h>
+#include <kern/debug.h>
+#include <uuid/uuid.h>
+
+#include <kdp/kdp_dyld.h>
+#include <kdp/kdp_en_debugger.h>
+
+#include <libsa/types.h>
+#include <libkern/version.h>
+
+#include <string.h> /* bcopy */
+
+#include <kern/processor.h>
+#include <kern/thread.h>
+#include <kern/clock.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_shared_region.h>
+#include <libkern/OSKextLibPrivate.h>
+
+extern unsigned int not_in_kdp;
+
+/*
+ * TODO: Even hackier than the other pieces.  This should really
+ * be moved off of kdp_pmap, and we should probably separate
+ * machine_trace_thread out of the kdp code.
+ */
+extern pmap_t kdp_pmap;
+extern addr64_t kdp_vtophys(pmap_t pmap, addr64_t va);
+
+int kdp_snapshot = 0;
+static int stack_snapshot_ret = 0;
+static unsigned stack_snapshot_bytes_traced = 0;
+
+static void *stack_snapshot_buf;
+static uint32_t stack_snapshot_bufsize;
+int stack_snapshot_pid;
+static uint32_t stack_snapshot_flags;
+static uint32_t stack_snapshot_dispatch_offset;
+static unsigned int old_debugger;
+
+void                   do_stackshot(void);
+void                   kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size,
+                               uint32_t flags, uint32_t dispatch_offset);
+void                   kdp_snapshot_postflight(void);
+static int             kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size,
+                               uint32_t flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
+int                    kdp_stack_snapshot_geterror(void);
+int                    kdp_stack_snapshot_bytes_traced(void);
+int                    kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
+static int             pid_from_task(task_t task);
+static uint64_t        proc_uniqueid_from_task(task_t task);
+static void            kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
+static boolean_t       kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size);
+static uint64_t                proc_was_throttled_from_task(task_t task);
+
+extern int             proc_pid(void *p);
+extern uint64_t                proc_uniqueid(void *p);
+extern uint64_t                proc_was_throttled(void *p);
+extern uint64_t                proc_did_throttle(void *p);
+static uint64_t                proc_did_throttle_from_task(task_t task);
+extern void            proc_name_kdp(task_t  task, char *buf, int size);
+extern int             proc_threadname_kdp(void *uth, char *buf, size_t size);
+extern void            proc_starttime_kdp(void *p, uint64_t *tv_sec, uint64_t *tv_usec);
+
+extern int             count_busy_buffers(void);   /* must track with declaration in bsd/sys/buf_internal.h */
+extern void            bcopy_phys(addr64_t, addr64_t, vm_size_t);
+extern int             machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
+extern int             machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
+
+/* Validates that the given address is both a valid page and has
+ * default caching attributes for the current kdp_pmap.  Returns
+ * 0 if the address is invalid, and a kernel virtual address for
+ * the given address if it is valid.
+ */
+vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr);
+
+/* Clears caching information used by the above validation routine
+ * (in case the kdp_pmap has been changed or cleared).
+ */
+void machine_trace_thread_clear_validation_cache(void);
+
+#define MAX_FRAMES 1000
+
+typedef struct thread_snapshot *thread_snapshot_t;
+typedef struct task_snapshot *task_snapshot_t;
+
+#if CONFIG_KDP_INTERACTIVE_DEBUGGING
+extern kdp_send_t    kdp_en_send_pkt;
+#endif 
+
+/*
+ * Globals to support machine_trace_thread_get_kva.
+ */
+static vm_offset_t prev_target_page = 0;
+static vm_offset_t prev_target_kva = 0;
+static boolean_t validate_next_addr = TRUE;
+
+
+/* 
+ * Method for grabbing timer values safely, in the sense that no infinite loop will occur 
+ * Certain flavors of the timer_grab function, which would seem to be the thing to use,   
+ * can loop infinitely if called while the timer is in the process of being updated.      
+ * Unfortunately, it is (rarely) possible to get inconsistent top and bottom halves of    
+ * the timer using this method. This seems insoluble, since stackshot runs in a context   
+ * where the timer might be half-updated, and has no way of yielding control just long    
+ * enough to finish the update.                                                           
+ */
+
+static uint64_t safe_grab_timer_value(struct timer *t)
+{
+#if   defined(__LP64__)
+  return t->all_bits;
+#else
+  uint64_t time = t->high_bits;    /* endian independent grab */
+  time = (time << 32) | t->low_bits;
+  return time;
+#endif
+}
+
+/* Cache stack snapshot parameters in preparation for a trace */
+void
+kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset)
+{
+       stack_snapshot_pid = pid;
+       stack_snapshot_buf = tracebuf;
+       stack_snapshot_bufsize = tracebuf_size;
+       stack_snapshot_flags = flags;
+       stack_snapshot_dispatch_offset = dispatch_offset;
+       kdp_snapshot++;
+       /* Mark this debugger as active, since the polled mode driver that 
+        * ordinarily does this may not be enabled (yet), or since KDB may be
+        * the primary debugger.
+        */
+       old_debugger = current_debugger;
+       if (old_debugger != KDP_CUR_DB) {
+               current_debugger = KDP_CUR_DB;
+       }
+}
+
+void
+kdp_snapshot_postflight(void)
+{
+       kdp_snapshot--;
+#if CONFIG_KDP_INTERACTIVE_DEBUGGING
+       if (
+                       (kdp_en_send_pkt == NULL) || (old_debugger == KDB_CUR_DB))
+               current_debugger = old_debugger;
+#else 
+       current_debugger = old_debugger;
+#endif 
+}
+
+int
+kdp_stack_snapshot_geterror(void)
+{
+       return stack_snapshot_ret;
+}
+
+int
+kdp_stack_snapshot_bytes_traced(void)
+{
+       return stack_snapshot_bytes_traced;
+}
+
+static int
+kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced)
+{
+       char *tracepos = (char *) tracebuf;
+       char *tracebound = tracepos + tracebuf_size;
+       uint32_t tracebytes = 0;
+       int error = 0, i;
+
+       task_t task = TASK_NULL;
+       thread_t thread = THREAD_NULL;
+       unsigned framesize = 2 * sizeof(vm_offset_t);
+
+       queue_head_t *task_list = &tasks;
+       boolean_t is_active_list = TRUE;
+       
+       boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
+       boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
+       boolean_t save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
+       boolean_t save_userframes_p = ((trace_flags & STACKSHOT_SAVE_KERNEL_FRAMES_ONLY) == 0);
+       boolean_t save_donating_pids_p = ((trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
+
+       if(trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
+         if(tracepos + sizeof(struct mem_and_io_snapshot) > tracebound) {
+           error = -1;
+           goto error_exit;
+         }
+         kdp_mem_and_io_snapshot((struct mem_and_io_snapshot *)tracepos);
+         tracepos += sizeof(struct mem_and_io_snapshot);
+       }
+       
+
+walk_list:
+       queue_iterate(task_list, task, task_t, tasks) {
+               if ((task == NULL) || !ml_validate_nofault((vm_offset_t) task, sizeof(struct task)))
+                       goto error_exit;
+
+               int task_pid = pid_from_task(task);
+               uint64_t task_uniqueid = proc_uniqueid_from_task(task);
+               boolean_t task64 = task_has_64BitAddr(task);
+
+               if (!task->active) {
+                       /* 
+                        * Not interested in terminated tasks without threads, and
+                        * at the moment, stackshot can't handle a task  without a name.
+                        */
+                       if (queue_empty(&task->threads) || task_pid == -1) {
+                               continue;
+                       }
+               }
+
+               /* Trace everything, unless a process was specified */
+               if ((pid == -1) || (pid == task_pid)) {
+                       task_snapshot_t task_snap;
+                       thread_snapshot_t tsnap = NULL;
+                       uint32_t uuid_info_count = 0;
+                       mach_vm_address_t uuid_info_addr = 0;
+                       boolean_t have_map = (task->map != NULL) && 
+                               (ml_validate_nofault((vm_offset_t)(task->map), sizeof(struct _vm_map)));
+                       boolean_t have_pmap = have_map && (task->map->pmap != NULL) &&
+                               (ml_validate_nofault((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
+                       uint64_t shared_cache_base_address = 0;
+
+                       if (have_pmap && task->active && save_loadinfo_p && task_pid > 0) {
+                               // Read the dyld_all_image_infos struct from the task memory to get UUID array count and location
+                               if (task64) {
+                                       struct user64_dyld_all_image_infos task_image_infos;
+                                       if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user64_dyld_all_image_infos))) {
+                                               uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
+                                               uuid_info_addr = task_image_infos.uuidArray;
+                                       }
+                               } else {
+                                       struct user32_dyld_all_image_infos task_image_infos;
+                                       if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user32_dyld_all_image_infos))) {
+                                               uuid_info_count = task_image_infos.uuidArrayCount;
+                                               uuid_info_addr = task_image_infos.uuidArray;
+                                       }
+                               }
+
+                               // If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
+                               // this data structure), we zero the uuid_info_count so that we won't even try to save load info
+                               // for this task.
+                               if (!uuid_info_addr) {
+                                       uuid_info_count = 0;
+                               }
+                       }
+
+                       if (have_pmap && save_kextloadinfo_p && task_pid == 0) {
+                               if (ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
+                                       uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
+                               }
+                       }
+
+                       if (tracepos + sizeof(struct task_snapshot) > tracebound) {
+                               error = -1;
+                               goto error_exit;
+                       }
+
+                       task_snap = (task_snapshot_t) tracepos;
+                       task_snap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC;
+                       task_snap->pid = task_pid;
+                       task_snap->uniqueid = task_uniqueid;
+                       task_snap->nloadinfos = uuid_info_count;
+                       task_snap->donating_pid_count = 0;
+
+                       /* Add the BSD process identifiers */
+                       if (task_pid != -1)
+                               proc_name_kdp(task, task_snap->p_comm, sizeof(task_snap->p_comm));
+                       else
+                               task_snap->p_comm[0] = '\0';
+                       task_snap->ss_flags = 0;
+                       if (task64)
+                               task_snap->ss_flags |= kUser64_p;
+                       if (task64 && task_pid == 0)
+                               task_snap->ss_flags |= kKernel64_p;
+                       if (!task->active) 
+                               task_snap->ss_flags |= kTerminatedSnapshot;
+                       if(task->pidsuspended) task_snap->ss_flags |= kPidSuspended;
+                       if(task->frozen) task_snap->ss_flags |= kFrozen;
+
+                       if (task->effective_policy.darwinbg == 1) {
+                               task_snap->ss_flags |= kTaskDarwinBG;
+                       }
+                       
+                       if (task->requested_policy.t_role == TASK_FOREGROUND_APPLICATION) {
+                               task_snap->ss_flags |= kTaskIsForeground;
+                       }
+
+                       if (task->requested_policy.t_boosted == 1) {
+                               task_snap->ss_flags |= kTaskIsBoosted;
+                       }
+
+                       if (task->effective_policy.t_sup_active == 1)
+                               task_snap->ss_flags |= kTaskIsSuppressed;
+#if IMPORTANCE_INHERITANCE
+                       if (task->task_imp_base) {
+                               if (task->task_imp_base->iit_donor) {
+                                       task_snap->ss_flags |= kTaskIsImpDonor;
+}
+
+                               if (task->task_imp_base->iit_live_donor) {
+                                       task_snap->ss_flags |= kTaskIsLiveImpDonor;
+                               }
+                       }
+#endif
+
+                       task_snap->latency_qos = (task->effective_policy.t_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
+                                                LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.t_latency_qos);
+
+                       task_snap->suspend_count = task->suspend_count;
+                       task_snap->task_size = have_pmap ? pmap_resident_count(task->map->pmap) : 0;
+                       task_snap->faults = task->faults;
+                       task_snap->pageins = task->pageins;
+                       task_snap->cow_faults = task->cow_faults;
+
+                       task_snap->user_time_in_terminated_threads = task->total_user_time;
+                       task_snap->system_time_in_terminated_threads = task->total_system_time;
+                       /*
+                        * The throttling counters are maintained as 64-bit counters in the proc
+                        * structure. However, we reserve 32-bits (each) for them in the task_snapshot
+                        * struct to save space and since we do not expect them to overflow 32-bits. If we
+                        * find these values overflowing in the future, the fix would be to simply 
+                        * upgrade these counters to 64-bit in the task_snapshot struct
+                        */
+                       task_snap->was_throttled = (uint32_t) proc_was_throttled_from_task(task);
+                       task_snap->did_throttle = (uint32_t) proc_did_throttle_from_task(task);
+
+                       /* fetch some useful BSD info: */
+                       task_snap->p_start_sec = task_snap->p_start_usec = 0;
+                       proc_starttime_kdp(task->bsd_info, &task_snap->p_start_sec, &task_snap->p_start_usec);
+                       if (task->shared_region && ml_validate_nofault((vm_offset_t)task->shared_region,
+                                                                                                                  sizeof(struct vm_shared_region))) {
+                               struct vm_shared_region *sr = task->shared_region;
+
+                               shared_cache_base_address = sr->sr_base_address + sr->sr_first_mapping;
+                       }
+                       if (!shared_cache_base_address
+                               || !kdp_copyin(task->map->pmap, shared_cache_base_address + offsetof(struct _dyld_cache_header, uuid), task_snap->shared_cache_identifier, sizeof(task_snap->shared_cache_identifier))) {
+                               memset(task_snap->shared_cache_identifier, 0x0, sizeof(task_snap->shared_cache_identifier));
+                       }
+                       if (task->shared_region) {
+                               /*
+                                * No refcounting here, but we are in debugger
+                                * context, so that should be safe.
+                                */
+                               task_snap->shared_cache_slide = task->shared_region->sr_slide_info.slide;
+                       } else {
+                               task_snap->shared_cache_slide = 0;
+                       }
+
+                       /* I/O Statistics */
+                       assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
+
+                       if (task->task_io_stats) {
+                               task_snap->disk_reads_count = task->task_io_stats->disk_reads.count;
+                               task_snap->disk_reads_size = task->task_io_stats->disk_reads.size;
+                               task_snap->disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
+                               task_snap->disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
+                               for(i = 0; i < IO_NUM_PRIORITIES; i++) {
+                                       task_snap->io_priority_count[i] = task->task_io_stats->io_priority[i].count;
+                                       task_snap->io_priority_size[i] = task->task_io_stats->io_priority[i].size;
+                               }
+                               task_snap->paging_count = task->task_io_stats->paging.count;
+                               task_snap->paging_size = task->task_io_stats->paging.size;
+                               task_snap->non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
+                               task_snap->non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
+                               task_snap->metadata_count = task->task_io_stats->metadata.count;
+                               task_snap->metadata_size = task->task_io_stats->metadata.size;
+                               task_snap->data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
+                               task_snap->data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
+                       } else {
+                               /* zero from disk_reads_count to end of structure */
+                               memset(&task_snap->disk_reads_count, 0, offsetof(struct task_snapshot, metadata_size) - offsetof(struct task_snapshot, disk_reads_count)); 
+                       }
+                       tracepos += sizeof(struct task_snapshot);
+
+                       if (task_pid > 0 && uuid_info_count > 0) {
+                               uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
+                               uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
+
+                               if (tracepos + uuid_info_array_size > tracebound) {
+                                       error = -1;
+                                       goto error_exit;
+                               }
+
+                               // Copy in the UUID info array
+                               // It may be nonresident, in which case just fix up nloadinfos to 0 in the task_snap
+                               if (have_pmap && !kdp_copyin(task->map->pmap, uuid_info_addr, tracepos, uuid_info_array_size))
+                                       task_snap->nloadinfos = 0;
+                               else
+                                       tracepos += uuid_info_array_size;
+                       } else if (task_pid == 0 && uuid_info_count > 0) {
+                               uint32_t uuid_info_size = (uint32_t)sizeof(kernel_uuid_info);
+                               uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
+                               kernel_uuid_info *output_uuids;
+
+                               if (tracepos + uuid_info_array_size > tracebound) {
+                                       error = -1;
+                                       goto error_exit;
+                               }
+
+                               output_uuids = (kernel_uuid_info *)tracepos;
+
+                               do {
+
+                                       if (!kernel_uuid || !ml_validate_nofault((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
+                                               /* Kernel UUID not found or inaccessible */
+                                               task_snap->nloadinfos = 0;
+                                               break;
+                                       }
+
+                                       output_uuids[0].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
+                                       memcpy(&output_uuids[0].imageUUID, kernel_uuid, sizeof(uuid_t));
+
+                                       if (ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
+                                                                                       gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
+                                               uint32_t kexti;
+
+                                               for (kexti=0 ; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
+                                                       output_uuids[1+kexti].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
+                                                       memcpy(&output_uuids[1+kexti].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
+                                               }
+
+                                               tracepos += uuid_info_array_size;
+                                       } else {
+                                               /* kext summary invalid, but kernel UUID was copied */
+                                               task_snap->nloadinfos = 1;
+                                               tracepos += uuid_info_size;
+                                               break;
+                                       }
+                               } while(0);
+                       }
+                       
+                       if (save_donating_pids_p) {
+                               task_snap->donating_pid_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS, (int *)tracepos, (unsigned int)((tracebound - tracepos)/sizeof(int)));
+                               tracepos += sizeof(int) * task_snap->donating_pid_count;
+                       }
+
+                       queue_iterate(&task->threads, thread, thread_t, task_threads){
+                               uint64_t tval;
+
+                               if ((thread == NULL) || !ml_validate_nofault((vm_offset_t) thread, sizeof(struct thread)))
+                                       goto error_exit;
+
+                               if (((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound)) {
+                                       error = -1;
+                                       goto error_exit;
+                               }
+                if (!save_userframes_p && thread->kernel_stack == 0)
+                    continue;
+
+                               /* Populate the thread snapshot header */
+                               tsnap = (thread_snapshot_t) tracepos;
+                               tsnap->thread_id = thread_tid(thread);
+                               tsnap->state = thread->state;
+                               tsnap->priority = thread->priority;
+                               tsnap->sched_pri = thread->sched_pri;
+                               tsnap->sched_flags = thread->sched_flags;
+                               tsnap->wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
+                               tsnap->continuation = VM_KERNEL_UNSLIDE(thread->continuation);
+                               tval = safe_grab_timer_value(&thread->user_timer);
+                               tsnap->user_time = tval;
+                               tval = safe_grab_timer_value(&thread->system_timer);
+                               if (thread->precise_user_kernel_time) {
+                                       tsnap->system_time = tval;
+                               } else {
+                                       tsnap->user_time += tval;
+                                       tsnap->system_time = 0;
+                               }
+                               tsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC;
+                               bzero(&tsnap->pth_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
+                               proc_threadname_kdp(thread->uthread, &tsnap->pth_name[0], STACKSHOT_MAX_THREAD_NAME_SIZE);
+                               tracepos += sizeof(struct thread_snapshot);
+                               tsnap->ss_flags = 0;
+                               /* I/O Statistics */
+                               assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
+                               if (thread->thread_io_stats) {
+                                       tsnap->disk_reads_count = thread->thread_io_stats->disk_reads.count;
+                                       tsnap->disk_reads_size = thread->thread_io_stats->disk_reads.size;
+                                       tsnap->disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
+                                       tsnap->disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
+                                       for(i = 0; i < IO_NUM_PRIORITIES; i++) {
+                                               tsnap->io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
+                                               tsnap->io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
+                                       }
+                                       tsnap->paging_count = thread->thread_io_stats->paging.count;
+                                       tsnap->paging_size = thread->thread_io_stats->paging.size;
+                                       tsnap->non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
+                                       tsnap->non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
+                                       tsnap->metadata_count = thread->thread_io_stats->metadata.count;
+                                       tsnap->metadata_size = thread->thread_io_stats->metadata.size;
+                                       tsnap->data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
+                                       tsnap->data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
+                               } else {
+                                       /* zero from disk_reads_count to end of structure */
+                                       memset(&tsnap->disk_reads_count, 0, 
+                                               offsetof(struct thread_snapshot, metadata_size) - offsetof(struct thread_snapshot, disk_reads_count));
+                               }
+
+                               if (thread->effective_policy.darwinbg) {
+                                       tsnap->ss_flags |= kThreadDarwinBG;
+                               }
+                               
+                               tsnap->io_tier = proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
+                               if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
+                                       tsnap->ss_flags |= kThreadIOPassive;
+                               }
+                               
+                               if (thread->suspend_count > 0) {
+                                       tsnap->ss_flags |= kThreadSuspended;
+                               }
+                               if (IPC_VOUCHER_NULL != thread->ith_voucher) {
+                                       tsnap->voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
+                               }
+
+                               tsnap->ts_qos = thread->effective_policy.thep_qos;
+                               tsnap->total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
+
+                               if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
+                                       uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
+                                       if (dqkeyaddr != 0) {
+                                               uint64_t dqaddr = 0;
+                                               if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) {
+                                                       uint64_t dqserialnumaddr = dqaddr + dispatch_offset;
+                                                       uint64_t dqserialnum = 0;
+                                                       if (kdp_copyin(task->map->pmap, dqserialnumaddr, &dqserialnum, (task64 ? 8 : 4))) {
+                                                               tsnap->ss_flags |= kHasDispatchSerial;
+                                                               *(uint64_t *)tracepos = dqserialnum;
+                                                               tracepos += 8;
+                                                       }
+                                               }
+                                       }
+                               }
+/* Call through to the machine specific trace routines
+ * Frames are added past the snapshot header.
+ */
+                               tracebytes = 0;
+                               if (thread->kernel_stack != 0) {
+#if defined(__LP64__)                                  
+                                       tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
+                                       tsnap->ss_flags |= kKernel64_p;
+                                       framesize = 16;
+#else
+                                       tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
+                                       framesize = 8;
+#endif
+                               }
+                               tsnap->nkern_frames = tracebytes/framesize;
+                               tracepos += tracebytes;
+                               tracebytes = 0;
+                               /* Trace user stack, if any */
+                               if (save_userframes_p && task->active && thread->task->map != kernel_map) {
+                                       /* 64-bit task? */
+                                       if (task_has_64BitAddr(thread->task)) {
+                                               tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
+                                               tsnap->ss_flags |= kUser64_p;
+                                               framesize = 16;
+                                       }
+                                       else {
+                                               tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
+                                               framesize = 8;
+                                       }
+                               }
+                               tsnap->nuser_frames = tracebytes/framesize;
+                               tracepos += tracebytes;
+                               tracebytes = 0;
+                       }
+
+            if (!save_userframes_p && tsnap == NULL) {
+                /*
+                 * No thread info is collected due to lack of kernel frames.
+                 * Remove information about this task also
+                 */
+                tracepos = (char *)task_snap;
+            }
+               }
+       }
+
+       if (is_active_list) { 
+               is_active_list = FALSE;
+               task_list = &terminated_tasks;
+               goto walk_list;
+       }
+
+error_exit:
+       /* Release stack snapshot wait indicator */
+       kdp_snapshot_postflight();
+
+       *pbytesTraced = (uint32_t)(tracepos - (char *) tracebuf);
+
+       return error;
+}
+
+static int pid_from_task(task_t task)
+{
+       int pid = -1;
+
+       if (task->bsd_info)
+               pid = proc_pid(task->bsd_info);
+
+       return pid;
+}
+
+static uint64_t
+proc_uniqueid_from_task(task_t task)
+{
+       uint64_t uniqueid = ~(0ULL);
+
+       if (task->bsd_info)
+               uniqueid = proc_uniqueid(task->bsd_info);
+
+       return uniqueid;
+}
+
+static uint64_t
+proc_was_throttled_from_task(task_t task)
+{
+       uint64_t was_throttled = 0;
+
+       if (task->bsd_info)
+               was_throttled = proc_was_throttled(task->bsd_info);
+       
+       return was_throttled;
+}
+
+static uint64_t
+proc_did_throttle_from_task(task_t task)
+{
+       uint64_t did_throttle = 0;
+
+       if (task->bsd_info)
+               did_throttle = proc_did_throttle(task->bsd_info);
+       
+       return did_throttle;
+}
+
+static void
+kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
+{
+       unsigned int pages_reclaimed;
+       unsigned int pages_wanted;
+       kern_return_t kErr;
+
+       processor_t processor;
+       vm_statistics64_t stat;
+       vm_statistics64_data_t host_vm_stat;
+
+       processor = processor_list;
+       stat = &PROCESSOR_DATA(processor, vm_stat);
+       host_vm_stat = *stat;
+
+       if (processor_count > 1) {
+               /*
+                * processor_list may be in the process of changing as we are
+                * attempting a stackshot.  Ordinarily it will be lock protected,
+                * but it is not safe to lock in the context of the debugger.
+                * Fortunately we never remove elements from the processor list,
+                * and only add to to the end of the list, so we SHOULD be able
+                * to walk it.  If we ever want to truly tear down processors,
+                * this will have to change.
+                */
+               while ((processor = processor->processor_list) != NULL) {
+                       stat = &PROCESSOR_DATA(processor, vm_stat);
+                       host_vm_stat.compressions += stat->compressions;
+                       host_vm_stat.decompressions += stat->decompressions;
+               }
+       }
+
+       memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
+       memio_snap->free_pages = vm_page_free_count;
+       memio_snap->active_pages = vm_page_active_count;
+       memio_snap->inactive_pages = vm_page_inactive_count;
+       memio_snap->purgeable_pages = vm_page_purgeable_count;
+       memio_snap->wired_pages = vm_page_wire_count;
+       memio_snap->speculative_pages = vm_page_speculative_count;
+       memio_snap->throttled_pages = vm_page_throttled_count;
+       memio_snap->busy_buffer_count = count_busy_buffers();
+       memio_snap->filebacked_pages = vm_page_pageable_external_count;
+       memio_snap->compressions = (uint32_t)host_vm_stat.compressions;
+       memio_snap->decompressions = (uint32_t)host_vm_stat.decompressions;
+       memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
+       kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
+
+       if ( ! kErr ) {
+               memio_snap->pages_wanted = (uint32_t)pages_wanted;
+               memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
+               memio_snap->pages_wanted_reclaimed_valid = 1;
+       } else {
+               memio_snap->pages_wanted = 0;
+               memio_snap->pages_reclaimed = 0;
+               memio_snap->pages_wanted_reclaimed_valid = 0;
+       }
+}
+
+boolean_t
+kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) 
+{
+       size_t rem = size;
+       char *kvaddr = dest;
+
+       while (rem) {
+               ppnum_t upn = pmap_find_phys(p, uaddr);
+               uint64_t phys_src = ptoa_64(upn) | (uaddr & PAGE_MASK);
+               uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr);
+               uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK);
+               uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
+               size_t cur_size = (uint32_t) MIN(src_rem, dst_rem);
+               cur_size = MIN(cur_size, rem);
+
+               if (upn && pmap_valid_page(upn) && phys_dest) {
+                       bcopy_phys(phys_src, phys_dest, cur_size);
+               }
+               else
+                       break;
+               uaddr += cur_size;
+               kvaddr += cur_size;
+               rem -= cur_size;        
+       }
+       return (rem == 0);
+}
+
+void
+do_stackshot()
+{
+    stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid,
+           stack_snapshot_buf, stack_snapshot_bufsize,
+           stack_snapshot_flags, stack_snapshot_dispatch_offset, 
+               &stack_snapshot_bytes_traced);
+
+}
+
+/*
+ * A fantastical routine that tries to be fast about returning
+ * translations.  Caches the last page we found a translation
+ * for, so that we can be quick about multiple queries to the
+ * same page.  It turns out this is exactly the workflow
+ * machine_trace_thread and its relatives tend to throw at us.
+ *
+ * Please zero the nasty global this uses after a bulk lookup;
+ * this isn't safe across a switch of the kdp_pmap or changes
+ * to a pmap.
+ *
+ * This also means that if zero is a valid KVA, we are
+ * screwed.  Sucks to be us.  Fortunately, this should never
+ * happen.
+ */
+vm_offset_t
+machine_trace_thread_get_kva(vm_offset_t cur_target_addr)
+{
+       unsigned cur_wimg_bits;
+       vm_offset_t cur_target_page;
+       vm_offset_t cur_phys_addr;
+       vm_offset_t kern_virt_target_addr;
+
+       cur_target_page = atop(cur_target_addr);
+
+       if ((cur_target_page != prev_target_page) || validate_next_addr) {
+               /*
+                * Alright; it wasn't our previous page.  So
+                * we must validate that there is a page
+                * table entry for this address under the
+                * current kdp_pmap, and that it has default
+                * cache attributes (otherwise it may not be
+                * safe to access it).
+                */
+               cur_phys_addr = kdp_vtophys(kdp_pmap ? kdp_pmap : kernel_pmap, cur_target_addr);
+
+               if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
+                       return 0;
+               }
+
+               cur_wimg_bits = pmap_cache_attributes((ppnum_t) atop(cur_phys_addr));
+
+               if ((cur_wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
+                       return 0;
+               }
+
+#if __x86_64__
+               kern_virt_target_addr = (vm_offset_t) PHYSMAP_PTOV(cur_phys_addr);
+#else
+#error Oh come on... we should really unify the physical -> kernel virtual interface
+#endif
+               prev_target_page = cur_target_page;
+               prev_target_kva = (kern_virt_target_addr & ~PAGE_MASK);
+               validate_next_addr = FALSE;
+               return kern_virt_target_addr;
+       } else {
+               /* We found a translation, so stash this page */
+               kern_virt_target_addr = prev_target_kva + (cur_target_addr & PAGE_MASK);
+               return kern_virt_target_addr;
+       }
+}
+
+void
+machine_trace_thread_clear_validation_cache(void)
+{
+       validate_next_addr = TRUE;
+}
+