]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/kern_stackshot.c
e571487faff1f1dbb696c4f96cfc89ccdd0c90ac
[apple/xnu.git] / osfmk / kern / kern_stackshot.c
1 /*
2 * Copyright (c) 2013-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/vm_param.h>
31 #include <mach/mach_vm.h>
32 #include <mach/clock_types.h>
33 #include <sys/errno.h>
34 #include <sys/stackshot.h>
35 #ifdef IMPORTANCE_INHERITANCE
36 #include <ipc/ipc_importance.h>
37 #endif
38 #include <sys/appleapiopts.h>
39 #include <kern/debug.h>
40 #include <kern/block_hint.h>
41 #include <uuid/uuid.h>
42
43 #include <kdp/kdp_dyld.h>
44 #include <kdp/kdp_en_debugger.h>
45
46 #include <libsa/types.h>
47 #include <libkern/version.h>
48 #include <libkern/section_keywords.h>
49
50 #include <string.h> /* bcopy */
51
52 #include <kern/coalition.h>
53 #include <kern/processor.h>
54 #include <kern/thread.h>
55 #include <kern/thread_group.h>
56 #include <kern/task.h>
57 #include <kern/telemetry.h>
58 #include <kern/clock.h>
59 #include <kern/policy_internal.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_kern.h>
62 #include <vm/vm_pageout.h>
63 #include <vm/vm_fault.h>
64 #include <vm/vm_shared_region.h>
65 #include <libkern/OSKextLibPrivate.h>
66
67 #if defined(__x86_64__)
68 #include <i386/mp.h>
69 #include <i386/cpu_threads.h>
70 #endif
71
72 #if CONFIG_EMBEDDED
73 #include <pexpert/pexpert.h> /* For gPanicBase/gPanicBase */
74 #endif
75
76 #if MONOTONIC
77 #include <kern/monotonic.h>
78 #endif /* MONOTONIC */
79
80 #include <san/kasan.h>
81
82 extern unsigned int not_in_kdp;
83
84
85 /* indicate to the compiler that some accesses are unaligned */
86 typedef uint64_t unaligned_u64 __attribute__((aligned(1)));
87
88 extern addr64_t kdp_vtophys(pmap_t pmap, addr64_t va);
89
90 int kdp_snapshot = 0;
91 static kern_return_t stack_snapshot_ret = 0;
92 static uint32_t stack_snapshot_bytes_traced = 0;
93
94 static kcdata_descriptor_t stackshot_kcdata_p = NULL;
95 static void *stack_snapshot_buf;
96 static uint32_t stack_snapshot_bufsize;
97 int stack_snapshot_pid;
98 static uint32_t stack_snapshot_flags;
99 static uint64_t stack_snapshot_delta_since_timestamp;
100 static boolean_t panic_stackshot;
101
102 static boolean_t stack_enable_faulting = FALSE;
103 static struct stackshot_fault_stats fault_stats;
104
105 static unaligned_u64 * stackshot_duration_outer;
106 static uint64_t stackshot_microsecs;
107
108 void * kernel_stackshot_buf = NULL; /* Pointer to buffer for stackshots triggered from the kernel and retrieved later */
109 int kernel_stackshot_buf_size = 0;
110
111 void * stackshot_snapbuf = NULL; /* Used by stack_snapshot2 (to be removed) */
112
113 __private_extern__ void stackshot_init( void );
114 static boolean_t memory_iszero(void *addr, size_t size);
115 #if CONFIG_TELEMETRY
116 kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
117 #endif
118 uint32_t get_stackshot_estsize(uint32_t prev_size_hint);
119 kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config,
120 size_t stackshot_config_size, boolean_t stackshot_from_user);
121 kern_return_t do_stackshot(void *);
122 void kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, kcdata_descriptor_t data_p, uint64_t since_timestamp);
123 boolean_t stackshot_thread_is_idle_worker_unsafe(thread_t thread);
124 static int kdp_stackshot_kcdata_format(int pid, uint32_t trace_flags, uint32_t *pBytesTraced);
125 uint32_t kdp_stack_snapshot_bytes_traced(void);
126 static void kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
127 static boolean_t kdp_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_result);
128 static int kdp_copyin_string(task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results);
129 static boolean_t kdp_copyin_word(task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results);
130 static uint64_t proc_was_throttled_from_task(task_t task);
131 static void stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_t * waitinfo);
132 static int stackshot_thread_has_valid_waitinfo(thread_t thread);
133 static void stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_t *tsinfo);
134 static int stackshot_thread_has_valid_turnstileinfo(thread_t thread);
135
136 #if CONFIG_COALITIONS
137 static void stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal);
138 static void stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal);
139 #endif /* CONFIG_COALITIONS */
140
141
142 extern uint32_t workqueue_get_pwq_state_kdp(void *proc);
143
144 extern int proc_pid(void *p);
145 extern uint64_t proc_uniqueid(void *p);
146 extern uint64_t proc_was_throttled(void *p);
147 extern uint64_t proc_did_throttle(void *p);
148 extern int proc_exiting(void *p);
149 extern int proc_in_teardown(void *p);
150 static uint64_t proc_did_throttle_from_task(task_t task);
151 extern void proc_name_kdp(task_t task, char * buf, int size);
152 extern int proc_threadname_kdp(void * uth, char * buf, size_t size);
153 extern void proc_starttime_kdp(void * p, uint64_t * tv_sec, uint64_t * tv_usec, uint64_t * abstime);
154 extern boolean_t proc_binary_uuid_kdp(task_t task, uuid_t uuid);
155 extern int memorystatus_get_pressure_status_kdp(void);
156 extern void memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit);
157
158 extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */
159 extern void bcopy_phys(addr64_t, addr64_t, vm_size_t);
160
161 #if CONFIG_TELEMETRY
162 extern kern_return_t stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval);
163 #endif /* CONFIG_TELEMETRY */
164
165 extern kern_return_t kern_stack_snapshot_with_reason(char* reason);
166 extern kern_return_t kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user);
167
168 /*
169 * Validates that the given address is both a valid page and has
170 * default caching attributes for the current map. Returns
171 * 0 if the address is invalid, and a kernel virtual address for
172 * the given address if it is valid.
173 */
174 vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr, vm_map_t map, uint32_t *thread_trace_flags);
175
176 #define KDP_FAULT_RESULT_PAGED_OUT 0x1 /* some data was unable to be retrieved */
177 #define KDP_FAULT_RESULT_TRIED_FAULT 0x2 /* tried to fault in data */
178 #define KDP_FAULT_RESULT_FAULTED_IN 0x4 /* successfully faulted in data */
179
180 /*
181 * Looks up the physical translation for the given address in the target map, attempting
182 * to fault data in if requested and it is not resident. Populates thread_trace_flags if requested
183 * as well.
184 */
185 vm_offset_t kdp_find_phys(vm_map_t map, vm_offset_t target_addr, boolean_t try_fault, uint32_t *kdp_fault_results);
186
187 static size_t stackshot_strlcpy(char *dst, const char *src, size_t maxlen);
188 static void stackshot_memcpy(void *dst, const void *src, size_t len);
189
190 /* Clears caching information used by the above validation routine
191 * (in case the current map has been changed or cleared).
192 */
193 void machine_trace_thread_clear_validation_cache(void);
194
195 #define MAX_FRAMES 1000
196 #define MAX_LOADINFOS 500
197 #define TASK_IMP_WALK_LIMIT 20
198
199 typedef struct thread_snapshot *thread_snapshot_t;
200 typedef struct task_snapshot *task_snapshot_t;
201
202 #if CONFIG_KDP_INTERACTIVE_DEBUGGING
203 extern kdp_send_t kdp_en_send_pkt;
204 #endif
205
206 /*
207 * Globals to support machine_trace_thread_get_kva.
208 */
209 static vm_offset_t prev_target_page = 0;
210 static vm_offset_t prev_target_kva = 0;
211 static boolean_t validate_next_addr = TRUE;
212
213 /*
214 * Stackshot locking and other defines.
215 */
216 static lck_grp_t *stackshot_subsys_lck_grp;
217 static lck_grp_attr_t *stackshot_subsys_lck_grp_attr;
218 static lck_attr_t *stackshot_subsys_lck_attr;
219 static lck_mtx_t stackshot_subsys_mutex;
220
221 #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
222 #define STACKSHOT_SUBSYS_TRY_LOCK() lck_mtx_try_lock(&stackshot_subsys_mutex)
223 #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
224
225 #define SANE_BOOTPROFILE_TRACEBUF_SIZE (64ULL * 1024ULL * 1024ULL)
226 #define SANE_TRACEBUF_SIZE (8ULL * 1024ULL * 1024ULL)
227
228 #define TRACEBUF_SIZE_PER_GB (1024ULL * 1024ULL)
229
230 SECURITY_READ_ONLY_LATE(static uint32_t) max_tracebuf_size = SANE_TRACEBUF_SIZE;
231
232 /*
233 * We currently set a ceiling of 3 milliseconds spent in the kdp fault path
234 * for non-panic stackshots where faulting is requested.
235 */
236 #define KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS (3 * NSEC_PER_MSEC)
237
238 #define STACKSHOT_SUPP_SIZE (16 * 1024) /* Minimum stackshot size */
239 #define TASK_UUID_AVG_SIZE (16 * sizeof(uuid_t)) /* Average space consumed by UUIDs/task */
240
241 #ifndef ROUNDUP
242 #define ROUNDUP(x, y) ((((x)+(y)-1)/(y))*(y))
243 #endif
244
245 #define STACKSHOT_QUEUE_LABEL_MAXSIZE 64
246
247 /*
248 * Initialize the mutex governing access to the stack snapshot subsystem
249 * and other stackshot related bits.
250 */
251 __private_extern__ void
252 stackshot_init( void )
253 {
254 mach_timebase_info_data_t timebase;
255
256 stackshot_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
257
258 stackshot_subsys_lck_grp = lck_grp_alloc_init("stackshot_subsys_lock", stackshot_subsys_lck_grp_attr);
259
260 stackshot_subsys_lck_attr = lck_attr_alloc_init();
261
262 lck_mtx_init(&stackshot_subsys_mutex, stackshot_subsys_lck_grp, stackshot_subsys_lck_attr);
263
264 clock_timebase_info(&timebase);
265 fault_stats.sfs_system_max_fault_time = ((KDP_FAULT_PATH_MAX_TIME_PER_STACKSHOT_NSECS * timebase.denom) / timebase.numer);
266
267 max_tracebuf_size = MAX(max_tracebuf_size, (ROUNDUP(max_mem, (1024ULL * 1024ULL * 1024ULL)) / TRACEBUF_SIZE_PER_GB));
268
269 PE_parse_boot_argn("stackshot_maxsz", &max_tracebuf_size, sizeof(max_tracebuf_size));
270 }
271
272 /*
273 * Method for grabbing timer values safely, in the sense that no infinite loop will occur
274 * Certain flavors of the timer_grab function, which would seem to be the thing to use,
275 * can loop infinitely if called while the timer is in the process of being updated.
276 * Unfortunately, it is (rarely) possible to get inconsistent top and bottom halves of
277 * the timer using this method. This seems insoluble, since stackshot runs in a context
278 * where the timer might be half-updated, and has no way of yielding control just long
279 * enough to finish the update.
280 */
281
282 static uint64_t
283 safe_grab_timer_value(struct timer *t)
284 {
285 #if defined(__LP64__)
286 return t->all_bits;
287 #else
288 uint64_t time = t->high_bits; /* endian independent grab */
289 time = (time << 32) | t->low_bits;
290 return time;
291 #endif
292 }
293
294 /*
295 * Called with interrupts disabled after stackshot context has been
296 * initialized. Updates stack_snapshot_ret.
297 */
298 static kern_return_t
299 stackshot_trap()
300 {
301 kern_return_t rv;
302
303 #if defined(__x86_64__)
304 /*
305 * Since mp_rendezvous and stackshot both attempt to capture cpus then perform an
306 * operation, it's essential to apply mutual exclusion to the other when one
307 * mechanism is in operation, lest there be a deadlock as the mechanisms race to
308 * capture CPUs.
309 *
310 * Further, we assert that invoking stackshot from mp_rendezvous*() is not
311 * allowed, so we check to ensure there there is no rendezvous in progress before
312 * trying to grab the lock (if there is, a deadlock will occur when we try to
313 * grab the lock). This is accomplished by setting cpu_rendezvous_in_progress to
314 * TRUE in the mp rendezvous action function. If stackshot_trap() is called by
315 * a subordinate of the call chain within the mp rendezvous action, this flag will
316 * be set and can be used to detect the inevitable deadlock that would occur
317 * if this thread tried to grab the rendezvous lock.
318 */
319
320 if (current_cpu_datap()->cpu_rendezvous_in_progress == TRUE) {
321 panic("Calling stackshot from a rendezvous is not allowed!");
322 }
323
324 mp_rendezvous_lock();
325 #endif
326
327 rv = DebuggerTrapWithState(DBOP_STACKSHOT, NULL, NULL, NULL, 0, NULL, FALSE, 0);
328
329 #if defined(__x86_64__)
330 mp_rendezvous_unlock();
331 #endif
332 return rv;
333 }
334
335
336 kern_return_t
337 stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint32_t flags, uint64_t delta_since_timestamp, unsigned *bytes_traced)
338 {
339 kern_return_t error = KERN_SUCCESS;
340 boolean_t istate;
341
342 #if DEVELOPMENT || DEBUG
343 if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
344 error = KERN_NOT_SUPPORTED;
345 goto out;
346 }
347 #endif
348 if ((buf == NULL) || (size <= 0) || (bytes_traced == NULL)) {
349 return KERN_INVALID_ARGUMENT;
350 }
351
352 /* cap in individual stackshot to max_tracebuf_size */
353 if (size > max_tracebuf_size) {
354 size = max_tracebuf_size;
355 }
356
357 /* Serialize tracing */
358 if (flags & STACKSHOT_TRYLOCK) {
359 if (!STACKSHOT_SUBSYS_TRY_LOCK()) {
360 return KERN_LOCK_OWNED;
361 }
362 } else {
363 STACKSHOT_SUBSYS_LOCK();
364 }
365
366 struct kcdata_descriptor kcdata;
367 uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ?
368 KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT : KCDATA_BUFFER_BEGIN_STACKSHOT;
369
370 error = kcdata_memory_static_init(&kcdata, (mach_vm_address_t)buf, hdr_tag, size,
371 KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
372 if (error) {
373 goto out;
374 }
375
376 istate = ml_set_interrupts_enabled(FALSE);
377
378 /* Preload trace parameters*/
379 kdp_snapshot_preflight(pid, buf, size, flags, &kcdata, delta_since_timestamp);
380
381 /*
382 * Trap to the debugger to obtain a coherent stack snapshot; this populates
383 * the trace buffer
384 */
385 error = stackshot_trap();
386
387 ml_set_interrupts_enabled(istate);
388
389 *bytes_traced = kdp_stack_snapshot_bytes_traced();
390
391 out:
392 stackshot_kcdata_p = NULL;
393 STACKSHOT_SUBSYS_UNLOCK();
394 return error;
395 }
396
397 #if CONFIG_TELEMETRY
398 kern_return_t
399 stack_microstackshot(user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, int32_t *retval)
400 {
401 int error = KERN_SUCCESS;
402 uint32_t bytes_traced = 0;
403
404 *retval = -1;
405
406 /*
407 * Control related operations
408 */
409 if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
410 telemetry_global_ctl(1);
411 *retval = 0;
412 goto exit;
413 } else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
414 telemetry_global_ctl(0);
415 *retval = 0;
416 goto exit;
417 }
418
419 /*
420 * Data related operations
421 */
422 *retval = -1;
423
424 if ((((void*)tracebuf) == NULL) || (tracebuf_size == 0)) {
425 error = KERN_INVALID_ARGUMENT;
426 goto exit;
427 }
428
429 STACKSHOT_SUBSYS_LOCK();
430
431 if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
432 if (tracebuf_size > max_tracebuf_size) {
433 error = KERN_INVALID_ARGUMENT;
434 goto unlock_exit;
435 }
436
437 bytes_traced = tracebuf_size;
438 error = telemetry_gather(tracebuf, &bytes_traced,
439 (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? TRUE : FALSE);
440 *retval = (int)bytes_traced;
441 goto unlock_exit;
442 }
443
444 if (flags & STACKSHOT_GET_BOOT_PROFILE) {
445 if (tracebuf_size > SANE_BOOTPROFILE_TRACEBUF_SIZE) {
446 error = KERN_INVALID_ARGUMENT;
447 goto unlock_exit;
448 }
449
450 bytes_traced = tracebuf_size;
451 error = bootprofile_gather(tracebuf, &bytes_traced);
452 *retval = (int)bytes_traced;
453 }
454
455 unlock_exit:
456 STACKSHOT_SUBSYS_UNLOCK();
457 exit:
458 return error;
459 }
460 #endif /* CONFIG_TELEMETRY */
461
462 /*
463 * Return the estimated size of a stackshot based on the
464 * number of currently running threads and tasks.
465 */
466 uint32_t
467 get_stackshot_estsize(uint32_t prev_size_hint)
468 {
469 vm_size_t thread_total;
470 vm_size_t task_total;
471 uint32_t estimated_size;
472
473 thread_total = (threads_count * sizeof(struct thread_snapshot));
474 task_total = (tasks_count * (sizeof(struct task_snapshot) + TASK_UUID_AVG_SIZE));
475
476 estimated_size = (uint32_t) VM_MAP_ROUND_PAGE((thread_total + task_total + STACKSHOT_SUPP_SIZE), PAGE_MASK);
477 if (estimated_size < prev_size_hint) {
478 estimated_size = (uint32_t) VM_MAP_ROUND_PAGE(prev_size_hint, PAGE_MASK);
479 }
480
481 return estimated_size;
482 }
483
484 /*
485 * stackshot_remap_buffer: Utility function to remap bytes_traced bytes starting at stackshotbuf
486 * into the current task's user space and subsequently copy out the address
487 * at which the buffer has been mapped in user space to out_buffer_addr.
488 *
489 * Inputs: stackshotbuf - pointer to the original buffer in the kernel's address space
490 * bytes_traced - length of the buffer to remap starting from stackshotbuf
491 * out_buffer_addr - pointer to placeholder where newly mapped buffer will be mapped.
492 * out_size_addr - pointer to be filled in with the size of the buffer
493 *
494 * Outputs: ENOSPC if there is not enough free space in the task's address space to remap the buffer
495 * EINVAL for all other errors returned by task_remap_buffer/mach_vm_remap
496 * an error from copyout
497 */
498 static kern_return_t
499 stackshot_remap_buffer(void *stackshotbuf, uint32_t bytes_traced, uint64_t out_buffer_addr, uint64_t out_size_addr)
500 {
501 int error = 0;
502 mach_vm_offset_t stackshotbuf_user_addr = (mach_vm_offset_t)NULL;
503 vm_prot_t cur_prot, max_prot;
504
505 error = mach_vm_remap_kernel(get_task_map(current_task()), &stackshotbuf_user_addr, bytes_traced, 0,
506 VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_NONE, kernel_map, (mach_vm_offset_t)stackshotbuf, FALSE, &cur_prot, &max_prot, VM_INHERIT_DEFAULT);
507 /*
508 * If the call to mach_vm_remap fails, we return the appropriate converted error
509 */
510 if (error == KERN_SUCCESS) {
511 /*
512 * If we fail to copy out the address or size of the new buffer, we remove the buffer mapping that
513 * we just made in the task's user space.
514 */
515 error = copyout(CAST_DOWN(void *, &stackshotbuf_user_addr), (user_addr_t)out_buffer_addr, sizeof(stackshotbuf_user_addr));
516 if (error != KERN_SUCCESS) {
517 mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
518 return error;
519 }
520 error = copyout(&bytes_traced, (user_addr_t)out_size_addr, sizeof(bytes_traced));
521 if (error != KERN_SUCCESS) {
522 mach_vm_deallocate(get_task_map(current_task()), stackshotbuf_user_addr, (mach_vm_size_t)bytes_traced);
523 return error;
524 }
525 }
526 return error;
527 }
528
529 kern_return_t
530 kern_stack_snapshot_internal(int stackshot_config_version, void *stackshot_config, size_t stackshot_config_size, boolean_t stackshot_from_user)
531 {
532 int error = 0;
533 boolean_t prev_interrupt_state;
534 uint32_t bytes_traced = 0;
535 uint32_t stackshotbuf_size = 0;
536 void * stackshotbuf = NULL;
537 kcdata_descriptor_t kcdata_p = NULL;
538
539 void * buf_to_free = NULL;
540 int size_to_free = 0;
541
542 /* Parsed arguments */
543 uint64_t out_buffer_addr;
544 uint64_t out_size_addr;
545 int pid = -1;
546 uint32_t flags;
547 uint64_t since_timestamp;
548 uint32_t size_hint = 0;
549
550 if (stackshot_config == NULL) {
551 return KERN_INVALID_ARGUMENT;
552 }
553 #if DEVELOPMENT || DEBUG
554 /* TBD: ask stackshot clients to avoid issuing stackshots in this
555 * configuration in lieu of the kernel feature override.
556 */
557 if (kern_feature_override(KF_STACKSHOT_OVRD) == TRUE) {
558 return KERN_NOT_SUPPORTED;
559 }
560 #endif
561
562 switch (stackshot_config_version) {
563 case STACKSHOT_CONFIG_TYPE:
564 if (stackshot_config_size != sizeof(stackshot_config_t)) {
565 return KERN_INVALID_ARGUMENT;
566 }
567 stackshot_config_t *config = (stackshot_config_t *) stackshot_config;
568 out_buffer_addr = config->sc_out_buffer_addr;
569 out_size_addr = config->sc_out_size_addr;
570 pid = config->sc_pid;
571 flags = config->sc_flags;
572 since_timestamp = config->sc_delta_timestamp;
573 if (config->sc_size <= max_tracebuf_size) {
574 size_hint = config->sc_size;
575 }
576 break;
577 default:
578 return KERN_NOT_SUPPORTED;
579 }
580
581 /*
582 * Currently saving a kernel buffer and trylock are only supported from the
583 * internal/KEXT API.
584 */
585 if (stackshot_from_user) {
586 if (flags & (STACKSHOT_TRYLOCK | STACKSHOT_SAVE_IN_KERNEL_BUFFER | STACKSHOT_FROM_PANIC)) {
587 return KERN_NO_ACCESS;
588 }
589 } else {
590 if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
591 return KERN_NOT_SUPPORTED;
592 }
593 }
594
595 if (!((flags & STACKSHOT_KCDATA_FORMAT) || (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER))) {
596 return KERN_NOT_SUPPORTED;
597 }
598
599 /*
600 * If we're not saving the buffer in the kernel pointer, we need a place to copy into.
601 */
602 if ((!out_buffer_addr || !out_size_addr) && !(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
603 return KERN_INVALID_ARGUMENT;
604 }
605
606 if (since_timestamp != 0 && ((flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) == 0)) {
607 return KERN_INVALID_ARGUMENT;
608 }
609
610 #if MONOTONIC
611 if (!mt_core_supported) {
612 flags &= ~STACKSHOT_INSTRS_CYCLES;
613 }
614 #else /* MONOTONIC */
615 flags &= ~STACKSHOT_INSTRS_CYCLES;
616 #endif /* !MONOTONIC */
617
618 STACKSHOT_SUBSYS_LOCK();
619
620 if (flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER) {
621 /*
622 * Don't overwrite an existing stackshot
623 */
624 if (kernel_stackshot_buf != NULL) {
625 error = KERN_MEMORY_PRESENT;
626 goto error_exit;
627 }
628 } else if (flags & STACKSHOT_RETRIEVE_EXISTING_BUFFER) {
629 if ((kernel_stackshot_buf == NULL) || (kernel_stackshot_buf_size <= 0)) {
630 error = KERN_NOT_IN_SET;
631 goto error_exit;
632 }
633 error = stackshot_remap_buffer(kernel_stackshot_buf, kernel_stackshot_buf_size,
634 out_buffer_addr, out_size_addr);
635 /*
636 * If we successfully remapped the buffer into the user's address space, we
637 * set buf_to_free and size_to_free so the prior kernel mapping will be removed
638 * and then clear the kernel stackshot pointer and associated size.
639 */
640 if (error == KERN_SUCCESS) {
641 buf_to_free = kernel_stackshot_buf;
642 size_to_free = (int) VM_MAP_ROUND_PAGE(kernel_stackshot_buf_size, PAGE_MASK);
643 kernel_stackshot_buf = NULL;
644 kernel_stackshot_buf_size = 0;
645 }
646
647 goto error_exit;
648 }
649
650 if (flags & STACKSHOT_GET_BOOT_PROFILE) {
651 void *bootprofile = NULL;
652 uint32_t len = 0;
653 #if CONFIG_TELEMETRY
654 bootprofile_get(&bootprofile, &len);
655 #endif
656 if (!bootprofile || !len) {
657 error = KERN_NOT_IN_SET;
658 goto error_exit;
659 }
660 error = stackshot_remap_buffer(bootprofile, len, out_buffer_addr, out_size_addr);
661 goto error_exit;
662 }
663
664 stackshotbuf_size = get_stackshot_estsize(size_hint);
665
666 for (; stackshotbuf_size <= max_tracebuf_size; stackshotbuf_size <<= 1) {
667 if (kmem_alloc(kernel_map, (vm_offset_t *)&stackshotbuf, stackshotbuf_size, VM_KERN_MEMORY_DIAG) != KERN_SUCCESS) {
668 error = KERN_RESOURCE_SHORTAGE;
669 goto error_exit;
670 }
671
672
673 uint32_t hdr_tag = (flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) ? KCDATA_BUFFER_BEGIN_DELTA_STACKSHOT : KCDATA_BUFFER_BEGIN_STACKSHOT;
674 kcdata_p = kcdata_memory_alloc_init((mach_vm_address_t)stackshotbuf, hdr_tag, stackshotbuf_size,
675 KCFLAG_USE_MEMCOPY | KCFLAG_NO_AUTO_ENDBUFFER);
676
677 stackshot_duration_outer = NULL;
678 uint64_t time_start = mach_absolute_time();
679
680 /*
681 * Disable interrupts and save the current interrupt state.
682 */
683 prev_interrupt_state = ml_set_interrupts_enabled(FALSE);
684
685 /*
686 * Load stackshot parameters.
687 */
688 kdp_snapshot_preflight(pid, stackshotbuf, stackshotbuf_size, flags, kcdata_p, since_timestamp);
689
690 error = stackshot_trap();
691
692 ml_set_interrupts_enabled(prev_interrupt_state);
693
694 /* record the duration that interupts were disabled */
695
696 uint64_t time_end = mach_absolute_time();
697 if (stackshot_duration_outer) {
698 *stackshot_duration_outer = time_end - time_start;
699 }
700
701 if (error != KERN_SUCCESS) {
702 if (kcdata_p != NULL) {
703 kcdata_memory_destroy(kcdata_p);
704 kcdata_p = NULL;
705 stackshot_kcdata_p = NULL;
706 }
707 kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
708 stackshotbuf = NULL;
709 if (error == KERN_INSUFFICIENT_BUFFER_SIZE) {
710 /*
711 * If we didn't allocate a big enough buffer, deallocate and try again.
712 */
713 continue;
714 } else {
715 goto error_exit;
716 }
717 }
718
719 bytes_traced = kdp_stack_snapshot_bytes_traced();
720
721 if (bytes_traced <= 0) {
722 error = KERN_ABORTED;
723 goto error_exit;
724 }
725
726 assert(bytes_traced <= stackshotbuf_size);
727 if (!(flags & STACKSHOT_SAVE_IN_KERNEL_BUFFER)) {
728 error = stackshot_remap_buffer(stackshotbuf, bytes_traced, out_buffer_addr, out_size_addr);
729 goto error_exit;
730 }
731
732 /*
733 * Save the stackshot in the kernel buffer.
734 */
735 kernel_stackshot_buf = stackshotbuf;
736 kernel_stackshot_buf_size = bytes_traced;
737 /*
738 * Figure out if we didn't use all the pages in the buffer. If so, we set buf_to_free to the beginning of
739 * the next page after the end of the stackshot in the buffer so that the kmem_free clips the buffer and
740 * update size_to_free for kmem_free accordingly.
741 */
742 size_to_free = stackshotbuf_size - (int) VM_MAP_ROUND_PAGE(bytes_traced, PAGE_MASK);
743
744 assert(size_to_free >= 0);
745
746 if (size_to_free != 0) {
747 buf_to_free = (void *)((uint64_t)stackshotbuf + stackshotbuf_size - size_to_free);
748 }
749
750 stackshotbuf = NULL;
751 stackshotbuf_size = 0;
752 goto error_exit;
753 }
754
755 if (stackshotbuf_size > max_tracebuf_size) {
756 error = KERN_RESOURCE_SHORTAGE;
757 }
758
759 error_exit:
760 if (kcdata_p != NULL) {
761 kcdata_memory_destroy(kcdata_p);
762 kcdata_p = NULL;
763 stackshot_kcdata_p = NULL;
764 }
765
766 if (stackshotbuf != NULL) {
767 kmem_free(kernel_map, (vm_offset_t)stackshotbuf, stackshotbuf_size);
768 }
769 if (buf_to_free != NULL) {
770 kmem_free(kernel_map, (vm_offset_t)buf_to_free, size_to_free);
771 }
772 STACKSHOT_SUBSYS_UNLOCK();
773 return error;
774 }
775
776 /*
777 * Cache stack snapshot parameters in preparation for a trace.
778 */
779 void
780 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags,
781 kcdata_descriptor_t data_p, uint64_t since_timestamp)
782 {
783 uint64_t microsecs = 0, secs = 0;
784 clock_get_calendar_microtime((clock_sec_t *)&secs, (clock_usec_t *)&microsecs);
785
786 stackshot_microsecs = microsecs + (secs * USEC_PER_SEC);
787 stack_snapshot_pid = pid;
788 stack_snapshot_buf = tracebuf;
789 stack_snapshot_bufsize = tracebuf_size;
790 stack_snapshot_flags = flags;
791 stack_snapshot_delta_since_timestamp = since_timestamp;
792
793 panic_stackshot = ((flags & STACKSHOT_FROM_PANIC) != 0);
794
795 assert(data_p != NULL);
796 assert(stackshot_kcdata_p == NULL);
797 stackshot_kcdata_p = data_p;
798
799 stack_snapshot_bytes_traced = 0;
800 }
801
802 void
803 panic_stackshot_reset_state()
804 {
805 stackshot_kcdata_p = NULL;
806 }
807
808 boolean_t
809 stackshot_active()
810 {
811 return stackshot_kcdata_p != NULL;
812 }
813
814 uint32_t
815 kdp_stack_snapshot_bytes_traced(void)
816 {
817 return stack_snapshot_bytes_traced;
818 }
819
820 static boolean_t
821 memory_iszero(void *addr, size_t size)
822 {
823 char *data = (char *)addr;
824 for (size_t i = 0; i < size; i++) {
825 if (data[i] != 0) {
826 return FALSE;
827 }
828 }
829 return TRUE;
830 }
831
832 #define kcd_end_address(kcd) ((void *)((uint64_t)((kcd)->kcd_addr_begin) + kcdata_memory_get_used_bytes((kcd))))
833 #define kcd_max_address(kcd) ((void *)((kcd)->kcd_addr_begin + (kcd)->kcd_length))
834 /*
835 * Use of the kcd_exit_on_error(action) macro requires a local
836 * 'kern_return_t error' variable and 'error_exit' label.
837 */
838 #define kcd_exit_on_error(action) \
839 do { \
840 if (KERN_SUCCESS != (error = (action))) { \
841 if (error == KERN_RESOURCE_SHORTAGE) { \
842 error = KERN_INSUFFICIENT_BUFFER_SIZE; \
843 } \
844 goto error_exit; \
845 } \
846 } while (0); /* end kcd_exit_on_error */
847
848 static uint64_t
849 kcdata_get_task_ss_flags(task_t task)
850 {
851 uint64_t ss_flags = 0;
852 boolean_t task_64bit_addr = task_has_64Bit_addr(task);
853
854 if (task_64bit_addr) {
855 ss_flags |= kUser64_p;
856 }
857 if (!task->active || task_is_a_corpse(task) || proc_exiting(task->bsd_info)) {
858 ss_flags |= kTerminatedSnapshot;
859 }
860 if (task->pidsuspended) {
861 ss_flags |= kPidSuspended;
862 }
863 if (task->frozen) {
864 ss_flags |= kFrozen;
865 }
866 if (task->effective_policy.tep_darwinbg == 1) {
867 ss_flags |= kTaskDarwinBG;
868 }
869 if (task->requested_policy.trp_role == TASK_FOREGROUND_APPLICATION) {
870 ss_flags |= kTaskIsForeground;
871 }
872 if (task->requested_policy.trp_boosted == 1) {
873 ss_flags |= kTaskIsBoosted;
874 }
875 if (task->effective_policy.tep_sup_active == 1) {
876 ss_flags |= kTaskIsSuppressed;
877 }
878 #if CONFIG_MEMORYSTATUS
879
880 boolean_t dirty = FALSE, dirty_tracked = FALSE, allow_idle_exit = FALSE;
881 memorystatus_proc_flags_unsafe(task->bsd_info, &dirty, &dirty_tracked, &allow_idle_exit);
882 if (dirty) {
883 ss_flags |= kTaskIsDirty;
884 }
885 if (dirty_tracked) {
886 ss_flags |= kTaskIsDirtyTracked;
887 }
888 if (allow_idle_exit) {
889 ss_flags |= kTaskAllowIdleExit;
890 }
891
892 #endif
893 if (task->effective_policy.tep_tal_engaged) {
894 ss_flags |= kTaskTALEngaged;
895 }
896
897 ss_flags |= (0x7 & workqueue_get_pwq_state_kdp(task->bsd_info)) << 17;
898
899 #if IMPORTANCE_INHERITANCE
900 if (task->task_imp_base) {
901 if (task->task_imp_base->iit_donor) {
902 ss_flags |= kTaskIsImpDonor;
903 }
904 if (task->task_imp_base->iit_live_donor) {
905 ss_flags |= kTaskIsLiveImpDonor;
906 }
907 }
908 #endif
909 return ss_flags;
910 }
911
912 static kern_return_t
913 kcdata_record_shared_cache_info(kcdata_descriptor_t kcd, task_t task, unaligned_u64 *task_snap_ss_flags)
914 {
915 kern_return_t error = KERN_SUCCESS;
916 mach_vm_address_t out_addr = 0;
917
918 uint64_t shared_cache_slide = 0;
919 uint64_t shared_cache_base_address = 0;
920 uint32_t kdp_fault_results = 0;
921
922 assert(task_snap_ss_flags != NULL);
923
924 if (task->shared_region && ml_validate_nofault((vm_offset_t)task->shared_region, sizeof(struct vm_shared_region))) {
925 struct vm_shared_region *sr = task->shared_region;
926 shared_cache_base_address = sr->sr_base_address + sr->sr_first_mapping;
927 } else {
928 *task_snap_ss_flags |= kTaskSharedRegionInfoUnavailable;
929 goto error_exit;
930 }
931
932 /* We haven't copied in the shared region UUID yet as part of setup */
933 if (!shared_cache_base_address || !task->shared_region->sr_uuid_copied) {
934 goto error_exit;
935 }
936
937 /*
938 * No refcounting here, but we are in debugger
939 * context, so that should be safe.
940 */
941 shared_cache_slide = task->shared_region->sr_slide_info.slide;
942
943 if (task->shared_region == init_task_shared_region) {
944 /* skip adding shared cache info -- it's the same as the system level one */
945 goto error_exit;
946 }
947
948 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64_v2), &out_addr));
949 struct dyld_uuid_info_64_v2 *shared_cache_data = (struct dyld_uuid_info_64_v2 *)out_addr;
950 shared_cache_data->imageLoadAddress = shared_cache_slide;
951 stackshot_memcpy(shared_cache_data->imageUUID, task->shared_region->sr_uuid, sizeof(task->shared_region->sr_uuid));
952 shared_cache_data->imageSlidBaseAddress = shared_cache_base_address;
953
954 error_exit:
955 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
956 *task_snap_ss_flags |= kTaskUUIDInfoMissing;
957 }
958
959 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
960 *task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
961 }
962
963 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
964 *task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
965 }
966
967 return error;
968 }
969
970 static kern_return_t
971 kcdata_record_uuid_info(kcdata_descriptor_t kcd, task_t task, uint32_t trace_flags, boolean_t have_pmap, unaligned_u64 *task_snap_ss_flags)
972 {
973 boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
974 boolean_t save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
975 boolean_t should_fault = (trace_flags & STACKSHOT_ENABLE_UUID_FAULTING);
976
977 kern_return_t error = KERN_SUCCESS;
978 mach_vm_address_t out_addr = 0;
979
980 uint32_t uuid_info_count = 0;
981 mach_vm_address_t uuid_info_addr = 0;
982 uint64_t uuid_info_timestamp = 0;
983 uint32_t kdp_fault_results = 0;
984
985 assert(task_snap_ss_flags != NULL);
986
987 int task_pid = pid_from_task(task);
988 boolean_t task_64bit_addr = task_has_64Bit_addr(task);
989
990 if (save_loadinfo_p && have_pmap && task->active && task_pid > 0) {
991 /* Read the dyld_all_image_infos struct from the task memory to get UUID array count and location */
992 if (task_64bit_addr) {
993 struct user64_dyld_all_image_infos task_image_infos;
994 if (kdp_copyin(task->map, task->all_image_info_addr, &task_image_infos,
995 sizeof(struct user64_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
996 uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
997 uuid_info_addr = task_image_infos.uuidArray;
998 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
999 uuid_info_timestamp = task_image_infos.timestamp;
1000 }
1001 }
1002 } else {
1003 struct user32_dyld_all_image_infos task_image_infos;
1004 if (kdp_copyin(task->map, task->all_image_info_addr, &task_image_infos,
1005 sizeof(struct user32_dyld_all_image_infos), should_fault, &kdp_fault_results)) {
1006 uuid_info_count = task_image_infos.uuidArrayCount;
1007 uuid_info_addr = task_image_infos.uuidArray;
1008 if (task_image_infos.version >= DYLD_ALL_IMAGE_INFOS_TIMESTAMP_MINIMUM_VERSION) {
1009 uuid_info_timestamp = task_image_infos.timestamp;
1010 }
1011 }
1012 }
1013
1014 /*
1015 * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
1016 * this data structure), we zero the uuid_info_count so that we won't even try to save load info
1017 * for this task.
1018 */
1019 if (!uuid_info_addr) {
1020 uuid_info_count = 0;
1021 }
1022 }
1023
1024 if (have_pmap && task_pid == 0) {
1025 if (save_kextloadinfo_p && ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
1026 uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
1027 } else {
1028 uuid_info_count = 1; /* include kernelcache UUID (embedded) or kernel UUID (desktop) */
1029 }
1030 }
1031
1032 if (save_loadinfo_p && task_pid > 0 && (uuid_info_count < MAX_LOADINFOS)) {
1033 uint32_t copied_uuid_count = 0;
1034 uint32_t uuid_info_size = (uint32_t)(task_64bit_addr ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
1035 uint32_t uuid_info_array_size = 0;
1036
1037 /* If we found some UUID information, first try to copy it in -- this will only be non-zero if we had a pmap above */
1038 if (uuid_info_count > 0) {
1039 uuid_info_array_size = uuid_info_count * uuid_info_size;
1040
1041 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
1042 uuid_info_size, uuid_info_count, &out_addr));
1043
1044 if (!kdp_copyin(task->map, uuid_info_addr, (void *)out_addr, uuid_info_array_size, should_fault, &kdp_fault_results)) {
1045 bzero((void *)out_addr, uuid_info_array_size);
1046 } else {
1047 copied_uuid_count = uuid_info_count;
1048 }
1049 }
1050
1051 uuid_t binary_uuid;
1052 if (!copied_uuid_count && proc_binary_uuid_kdp(task, binary_uuid)) {
1053 /* We failed to copyin the UUID information, try to store the UUID of the main binary we have in the proc */
1054 if (uuid_info_array_size == 0) {
1055 /* We just need to store one UUID */
1056 uuid_info_array_size = uuid_info_size;
1057 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, (task_64bit_addr ? KCDATA_TYPE_LIBRARY_LOADINFO64 : KCDATA_TYPE_LIBRARY_LOADINFO),
1058 uuid_info_size, 1, &out_addr));
1059 }
1060
1061 if (task_64bit_addr) {
1062 struct user64_dyld_uuid_info *uuid_info = (struct user64_dyld_uuid_info *)out_addr;
1063 uint64_t image_load_address = task->mach_header_vm_address;
1064
1065 stackshot_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
1066 stackshot_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
1067 } else {
1068 struct user32_dyld_uuid_info *uuid_info = (struct user32_dyld_uuid_info *)out_addr;
1069 uint32_t image_load_address = (uint32_t) task->mach_header_vm_address;
1070
1071 stackshot_memcpy(&uuid_info->imageUUID, binary_uuid, sizeof(uuid_t));
1072 stackshot_memcpy(&uuid_info->imageLoadAddress, &image_load_address, sizeof(image_load_address));
1073 }
1074 }
1075 } else if (task_pid == 0 && uuid_info_count > 0 && uuid_info_count < MAX_LOADINFOS) {
1076 uintptr_t image_load_address;
1077
1078 do {
1079 #if CONFIG_EMBEDDED
1080 if (kernelcache_uuid_valid && !save_kextloadinfo_p) {
1081 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_KERNELCACHE_LOADINFO, sizeof(struct dyld_uuid_info_64), &out_addr));
1082 struct dyld_uuid_info_64 *kc_uuid = (struct dyld_uuid_info_64 *)out_addr;
1083 kc_uuid->imageLoadAddress = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
1084 stackshot_memcpy(&kc_uuid->imageUUID, &kernelcache_uuid, sizeof(uuid_t));
1085 break;
1086 }
1087 #endif /* CONFIG_EMBEDDED */
1088
1089 if (!kernel_uuid || !ml_validate_nofault((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
1090 /* Kernel UUID not found or inaccessible */
1091 break;
1092 }
1093
1094 kcd_exit_on_error(kcdata_get_memory_addr_for_array(
1095 kcd, (sizeof(kernel_uuid_info) == sizeof(struct user64_dyld_uuid_info)) ? KCDATA_TYPE_LIBRARY_LOADINFO64
1096 : KCDATA_TYPE_LIBRARY_LOADINFO,
1097 sizeof(kernel_uuid_info), uuid_info_count, &out_addr));
1098 kernel_uuid_info *uuid_info_array = (kernel_uuid_info *)out_addr;
1099 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
1100 uuid_info_array[0].imageLoadAddress = image_load_address;
1101 stackshot_memcpy(&uuid_info_array[0].imageUUID, kernel_uuid, sizeof(uuid_t));
1102
1103 if (save_kextloadinfo_p &&
1104 ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader)) &&
1105 ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
1106 gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
1107 uint32_t kexti;
1108 for (kexti = 0; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
1109 image_load_address = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
1110 uuid_info_array[kexti + 1].imageLoadAddress = image_load_address;
1111 stackshot_memcpy(&uuid_info_array[kexti + 1].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
1112 }
1113 }
1114 } while (0);
1115 }
1116
1117 error_exit:
1118 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
1119 *task_snap_ss_flags |= kTaskUUIDInfoMissing;
1120 }
1121
1122 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
1123 *task_snap_ss_flags |= kTaskUUIDInfoTriedFault;
1124 }
1125
1126 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
1127 *task_snap_ss_flags |= kTaskUUIDInfoFaultedIn;
1128 }
1129
1130 return error;
1131 }
1132
1133 static kern_return_t
1134 kcdata_record_task_iostats(kcdata_descriptor_t kcd, task_t task)
1135 {
1136 kern_return_t error = KERN_SUCCESS;
1137 mach_vm_address_t out_addr = 0;
1138
1139 /* I/O Statistics if any counters are non zero */
1140 assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
1141 if (task->task_io_stats && !memory_iszero(task->task_io_stats, sizeof(struct io_stat_info))) {
1142 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
1143 struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
1144 _iostat->ss_disk_reads_count = task->task_io_stats->disk_reads.count;
1145 _iostat->ss_disk_reads_size = task->task_io_stats->disk_reads.size;
1146 _iostat->ss_disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
1147 _iostat->ss_disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
1148 _iostat->ss_paging_count = task->task_io_stats->paging.count;
1149 _iostat->ss_paging_size = task->task_io_stats->paging.size;
1150 _iostat->ss_non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
1151 _iostat->ss_non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
1152 _iostat->ss_metadata_count = task->task_io_stats->metadata.count;
1153 _iostat->ss_metadata_size = task->task_io_stats->metadata.size;
1154 _iostat->ss_data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
1155 _iostat->ss_data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
1156 for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
1157 _iostat->ss_io_priority_count[i] = task->task_io_stats->io_priority[i].count;
1158 _iostat->ss_io_priority_size[i] = task->task_io_stats->io_priority[i].size;
1159 }
1160 }
1161
1162 error_exit:
1163 return error;
1164 }
1165
1166 #if MONOTONIC
1167 static kern_return_t
1168 kcdata_record_task_instrs_cycles(kcdata_descriptor_t kcd, task_t task)
1169 {
1170 uint64_t instrs = 0, cycles = 0;
1171 mt_stackshot_task(task, &instrs, &cycles);
1172
1173 kern_return_t error = KERN_SUCCESS;
1174 mach_vm_address_t out_addr = 0;
1175 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
1176 struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
1177 instrs_cycles->ics_instructions = instrs;
1178 instrs_cycles->ics_cycles = cycles;
1179
1180 error_exit:
1181 return error;
1182 }
1183 #endif /* MONOTONIC */
1184
1185 static kern_return_t
1186 kcdata_record_task_snapshot(kcdata_descriptor_t kcd, task_t task, uint32_t trace_flags, boolean_t have_pmap, unaligned_u64 **task_snap_ss_flags)
1187 {
1188 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
1189 boolean_t collect_iostats = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
1190 #if MONOTONIC
1191 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
1192 #endif /* MONOTONIC */
1193 #if __arm__ || __arm64__
1194 boolean_t collect_asid = ((trace_flags & STACKSHOT_ASID) != 0);
1195 #endif
1196 boolean_t collect_pagetables = ((trace_flags & STACKSHOT_PAGE_TABLES) != 0);
1197
1198
1199 kern_return_t error = KERN_SUCCESS;
1200 mach_vm_address_t out_addr = 0;
1201 struct task_snapshot_v2 * cur_tsnap = NULL;
1202
1203 assert(task_snap_ss_flags != NULL);
1204
1205 int task_pid = pid_from_task(task);
1206 uint64_t task_uniqueid = get_task_uniqueid(task);
1207 uint64_t proc_starttime_secs = 0;
1208
1209 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_SNAPSHOT, sizeof(struct task_snapshot_v2), &out_addr));
1210 cur_tsnap = (struct task_snapshot_v2 *)out_addr;
1211 bzero(cur_tsnap, sizeof(*cur_tsnap));
1212
1213 cur_tsnap->ts_unique_pid = task_uniqueid;
1214 cur_tsnap->ts_ss_flags = kcdata_get_task_ss_flags(task);
1215 *task_snap_ss_flags = (unaligned_u64 *)&cur_tsnap->ts_ss_flags;
1216 cur_tsnap->ts_user_time_in_terminated_threads = task->total_user_time;
1217 cur_tsnap->ts_system_time_in_terminated_threads = task->total_system_time;
1218
1219 proc_starttime_kdp(task->bsd_info, &proc_starttime_secs, NULL, NULL);
1220 cur_tsnap->ts_p_start_sec = proc_starttime_secs;
1221 cur_tsnap->ts_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
1222 cur_tsnap->ts_max_resident_size = get_task_resident_max(task);
1223 cur_tsnap->ts_was_throttled = (uint32_t) proc_was_throttled_from_task(task);
1224 cur_tsnap->ts_did_throttle = (uint32_t) proc_did_throttle_from_task(task);
1225
1226 cur_tsnap->ts_suspend_count = task->suspend_count;
1227 cur_tsnap->ts_faults = task->faults;
1228 cur_tsnap->ts_pageins = task->pageins;
1229 cur_tsnap->ts_cow_faults = task->cow_faults;
1230 cur_tsnap->ts_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
1231 LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
1232 cur_tsnap->ts_pid = task_pid;
1233
1234 #if __arm__ || __arm64__
1235 if (collect_asid && have_pmap) {
1236 uint32_t asid = PMAP_VASID(task->map->pmap);
1237 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
1238 stackshot_memcpy((void*)out_addr, &asid, sizeof(asid));
1239 }
1240 #endif
1241 if (collect_pagetables && have_pmap) {
1242 #if INTERRUPT_MASKED_DEBUG
1243 // pagetable dumps can be large; reset the interrupt timeout to avoid a panic
1244 ml_spin_debug_clear_self();
1245 #endif
1246 size_t bytes_dumped = pmap_dump_page_tables(task->map->pmap, kcd_end_address(kcd), kcd_max_address(kcd));
1247 if (bytes_dumped == 0) {
1248 error = KERN_INSUFFICIENT_BUFFER_SIZE;
1249 goto error_exit;
1250 } else if (bytes_dumped == (size_t)-1) {
1251 error = KERN_NOT_SUPPORTED;
1252 goto error_exit;
1253 } else {
1254 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, STACKSHOT_KCTYPE_PAGE_TABLES,
1255 sizeof(uint64_t), (uint32_t)(bytes_dumped / sizeof(uint64_t)), &out_addr));
1256 }
1257 }
1258
1259 /* Add the BSD process identifiers */
1260 if (task_pid != -1 && task->bsd_info != NULL) {
1261 proc_name_kdp(task, cur_tsnap->ts_p_comm, sizeof(cur_tsnap->ts_p_comm));
1262 #if CONFIG_COALITIONS
1263 if ((trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) && (task->coalition[COALITION_TYPE_JETSAM] != NULL)) {
1264 uint64_t jetsam_coal_id = coalition_id(task->coalition[COALITION_TYPE_JETSAM]);
1265 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_JETSAM_COALITION, sizeof(jetsam_coal_id), &out_addr));
1266 stackshot_memcpy((void*)out_addr, &jetsam_coal_id, sizeof(jetsam_coal_id));
1267 }
1268 #endif /* CONFIG_COALITIONS */
1269 } else {
1270 cur_tsnap->ts_p_comm[0] = '\0';
1271 #if IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG)
1272 if (task->task_imp_base != NULL) {
1273 stackshot_strlcpy(cur_tsnap->ts_p_comm, &task->task_imp_base->iit_procname[0],
1274 MIN((int)sizeof(task->task_imp_base->iit_procname), (int)sizeof(cur_tsnap->ts_p_comm)));
1275 }
1276 #endif /* IMPORTANCE_INHERITANCE && (DEVELOPMENT || DEBUG) */
1277 }
1278
1279 if (collect_iostats) {
1280 kcd_exit_on_error(kcdata_record_task_iostats(kcd, task));
1281 }
1282
1283 #if MONOTONIC
1284 if (collect_instrs_cycles) {
1285 kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
1286 }
1287 #endif /* MONOTONIC */
1288
1289 error_exit:
1290 return error;
1291 }
1292
1293 static kern_return_t
1294 kcdata_record_task_delta_snapshot(kcdata_descriptor_t kcd, task_t task, uint32_t trace_flags, boolean_t have_pmap, unaligned_u64 **task_snap_ss_flags)
1295 {
1296 #if !MONOTONIC
1297 #pragma unused(trace_flags)
1298 #endif /* !MONOTONIC */
1299 kern_return_t error = KERN_SUCCESS;
1300 struct task_delta_snapshot_v2 * cur_tsnap = NULL;
1301 mach_vm_address_t out_addr = 0;
1302 (void) trace_flags;
1303 #if __arm__ || __arm64__
1304 boolean_t collect_asid = ((trace_flags & STACKSHOT_ASID) != 0);
1305 #endif
1306 #if MONOTONIC
1307 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
1308 #endif /* MONOTONIC */
1309
1310 uint64_t task_uniqueid = get_task_uniqueid(task);
1311 assert(task_snap_ss_flags != NULL);
1312
1313 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_TASK_DELTA_SNAPSHOT, sizeof(struct task_delta_snapshot_v2), &out_addr));
1314
1315 cur_tsnap = (struct task_delta_snapshot_v2 *)out_addr;
1316
1317 cur_tsnap->tds_unique_pid = task_uniqueid;
1318 cur_tsnap->tds_ss_flags = kcdata_get_task_ss_flags(task);
1319 *task_snap_ss_flags = (unaligned_u64 *)&cur_tsnap->tds_ss_flags;
1320
1321 cur_tsnap->tds_user_time_in_terminated_threads = task->total_user_time;
1322 cur_tsnap->tds_system_time_in_terminated_threads = task->total_system_time;
1323
1324 cur_tsnap->tds_task_size = have_pmap ? get_task_phys_footprint(task) : 0;
1325
1326 cur_tsnap->tds_max_resident_size = get_task_resident_max(task);
1327 cur_tsnap->tds_suspend_count = task->suspend_count;
1328 cur_tsnap->tds_faults = task->faults;
1329 cur_tsnap->tds_pageins = task->pageins;
1330 cur_tsnap->tds_cow_faults = task->cow_faults;
1331 cur_tsnap->tds_was_throttled = (uint32_t)proc_was_throttled_from_task(task);
1332 cur_tsnap->tds_did_throttle = (uint32_t)proc_did_throttle_from_task(task);
1333 cur_tsnap->tds_latency_qos = (task->effective_policy.tep_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED)
1334 ? LATENCY_QOS_TIER_UNSPECIFIED
1335 : ((0xFF << 16) | task->effective_policy.tep_latency_qos);
1336
1337 #if __arm__ || __arm64__
1338 if (collect_asid && have_pmap) {
1339 uint32_t asid = PMAP_VASID(task->map->pmap);
1340 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_ASID, sizeof(uint32_t), &out_addr));
1341 stackshot_memcpy((void*)out_addr, &asid, sizeof(asid));
1342 }
1343 #endif
1344
1345 #if MONOTONIC
1346 if (collect_instrs_cycles) {
1347 kcd_exit_on_error(kcdata_record_task_instrs_cycles(kcd, task));
1348 }
1349 #endif /* MONOTONIC */
1350
1351 error_exit:
1352 return error;
1353 }
1354
1355 static kern_return_t
1356 kcdata_record_thread_iostats(kcdata_descriptor_t kcd, thread_t thread)
1357 {
1358 kern_return_t error = KERN_SUCCESS;
1359 mach_vm_address_t out_addr = 0;
1360
1361 /* I/O Statistics */
1362 assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
1363 if (thread->thread_io_stats && !memory_iszero(thread->thread_io_stats, sizeof(struct io_stat_info))) {
1364 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_IOSTATS, sizeof(struct io_stats_snapshot), &out_addr));
1365 struct io_stats_snapshot *_iostat = (struct io_stats_snapshot *)out_addr;
1366 _iostat->ss_disk_reads_count = thread->thread_io_stats->disk_reads.count;
1367 _iostat->ss_disk_reads_size = thread->thread_io_stats->disk_reads.size;
1368 _iostat->ss_disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
1369 _iostat->ss_disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
1370 _iostat->ss_paging_count = thread->thread_io_stats->paging.count;
1371 _iostat->ss_paging_size = thread->thread_io_stats->paging.size;
1372 _iostat->ss_non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
1373 _iostat->ss_non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
1374 _iostat->ss_metadata_count = thread->thread_io_stats->metadata.count;
1375 _iostat->ss_metadata_size = thread->thread_io_stats->metadata.size;
1376 _iostat->ss_data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
1377 _iostat->ss_data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
1378 for (int i = 0; i < IO_NUM_PRIORITIES; i++) {
1379 _iostat->ss_io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
1380 _iostat->ss_io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
1381 }
1382 }
1383
1384 error_exit:
1385 return error;
1386 }
1387
1388 static kern_return_t
1389 kcdata_record_thread_snapshot(
1390 kcdata_descriptor_t kcd, thread_t thread, task_t task, uint32_t trace_flags, boolean_t have_pmap, boolean_t thread_on_core)
1391 {
1392 boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
1393 boolean_t active_kthreads_only_p = ((trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
1394 boolean_t trace_fp_p = false;
1395 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
1396 boolean_t collect_iostats = !collect_delta_stackshot && !(trace_flags & STACKSHOT_NO_IO_STATS);
1397 #if MONOTONIC
1398 boolean_t collect_instrs_cycles = ((trace_flags & STACKSHOT_INSTRS_CYCLES) != 0);
1399 #endif /* MONOTONIC */
1400
1401 kern_return_t error = KERN_SUCCESS;
1402 mach_vm_address_t out_addr = 0;
1403 int saved_count = 0;
1404
1405 struct thread_snapshot_v4 * cur_thread_snap = NULL;
1406 char cur_thread_name[STACKSHOT_MAX_THREAD_NAME_SIZE];
1407 uint64_t tval = 0;
1408 const boolean_t is_64bit_data = task_has_64Bit_data(task);
1409
1410 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_SNAPSHOT, sizeof(struct thread_snapshot_v4), &out_addr));
1411 cur_thread_snap = (struct thread_snapshot_v4 *)out_addr;
1412
1413 /* Populate the thread snapshot header */
1414 cur_thread_snap->ths_ss_flags = 0;
1415 cur_thread_snap->ths_thread_id = thread_tid(thread);
1416 cur_thread_snap->ths_wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
1417 cur_thread_snap->ths_continuation = VM_KERNEL_UNSLIDE(thread->continuation);
1418 cur_thread_snap->ths_total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
1419
1420 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
1421 cur_thread_snap->ths_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
1422 } else {
1423 cur_thread_snap->ths_voucher_identifier = 0;
1424 }
1425
1426 cur_thread_snap->ths_dqserialnum = 0;
1427 if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
1428 uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
1429 if (dqkeyaddr != 0) {
1430 uint64_t dqaddr = 0;
1431 boolean_t copyin_ok = kdp_copyin_word(task, dqkeyaddr, &dqaddr, FALSE, NULL);
1432 if (copyin_ok && dqaddr != 0) {
1433 uint64_t dqserialnumaddr = dqaddr + get_task_dispatchqueue_serialno_offset(task);
1434 uint64_t dqserialnum = 0;
1435 copyin_ok = kdp_copyin_word(task, dqserialnumaddr, &dqserialnum, FALSE, NULL);
1436 if (copyin_ok) {
1437 cur_thread_snap->ths_ss_flags |= kHasDispatchSerial;
1438 cur_thread_snap->ths_dqserialnum = dqserialnum;
1439 }
1440
1441 /* try copying in the queue label */
1442 uint64_t label_offs = get_task_dispatchqueue_label_offset(task);
1443 if (label_offs) {
1444 uint64_t dqlabeladdr = dqaddr + label_offs;
1445 uint64_t actual_dqlabeladdr = 0;
1446
1447 copyin_ok = kdp_copyin_word(task, dqlabeladdr, &actual_dqlabeladdr, FALSE, NULL);
1448 if (copyin_ok && actual_dqlabeladdr != 0) {
1449 char label_buf[STACKSHOT_QUEUE_LABEL_MAXSIZE];
1450 int len;
1451
1452 bzero(label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE * sizeof(char));
1453 len = kdp_copyin_string(task, actual_dqlabeladdr, label_buf, STACKSHOT_QUEUE_LABEL_MAXSIZE, FALSE, NULL);
1454 if (len > 0) {
1455 mach_vm_address_t label_addr = 0;
1456 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_DISPATCH_QUEUE_LABEL, len, &label_addr));
1457 stackshot_strlcpy((char*)label_addr, &label_buf[0], len);
1458 }
1459 }
1460 }
1461 }
1462 }
1463 }
1464
1465 tval = safe_grab_timer_value(&thread->user_timer);
1466 cur_thread_snap->ths_user_time = tval;
1467 tval = safe_grab_timer_value(&thread->system_timer);
1468
1469 if (thread->precise_user_kernel_time) {
1470 cur_thread_snap->ths_sys_time = tval;
1471 } else {
1472 cur_thread_snap->ths_user_time += tval;
1473 cur_thread_snap->ths_sys_time = 0;
1474 }
1475
1476 if (thread->thread_tag & THREAD_TAG_MAINTHREAD) {
1477 cur_thread_snap->ths_ss_flags |= kThreadMain;
1478 }
1479 if (thread->effective_policy.thep_darwinbg) {
1480 cur_thread_snap->ths_ss_flags |= kThreadDarwinBG;
1481 }
1482 if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
1483 cur_thread_snap->ths_ss_flags |= kThreadIOPassive;
1484 }
1485 if (thread->suspend_count > 0) {
1486 cur_thread_snap->ths_ss_flags |= kThreadSuspended;
1487 }
1488 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1489 cur_thread_snap->ths_ss_flags |= kGlobalForcedIdle;
1490 }
1491 if (thread_on_core) {
1492 cur_thread_snap->ths_ss_flags |= kThreadOnCore;
1493 }
1494 if (stackshot_thread_is_idle_worker_unsafe(thread)) {
1495 cur_thread_snap->ths_ss_flags |= kThreadIdleWorker;
1496 }
1497
1498 /* make sure state flags defined in kcdata.h still match internal flags */
1499 static_assert(SS_TH_WAIT == TH_WAIT);
1500 static_assert(SS_TH_SUSP == TH_SUSP);
1501 static_assert(SS_TH_RUN == TH_RUN);
1502 static_assert(SS_TH_UNINT == TH_UNINT);
1503 static_assert(SS_TH_TERMINATE == TH_TERMINATE);
1504 static_assert(SS_TH_TERMINATE2 == TH_TERMINATE2);
1505 static_assert(SS_TH_IDLE == TH_IDLE);
1506
1507 cur_thread_snap->ths_last_run_time = thread->last_run_time;
1508 cur_thread_snap->ths_last_made_runnable_time = thread->last_made_runnable_time;
1509 cur_thread_snap->ths_state = thread->state;
1510 cur_thread_snap->ths_sched_flags = thread->sched_flags;
1511 cur_thread_snap->ths_base_priority = thread->base_pri;
1512 cur_thread_snap->ths_sched_priority = thread->sched_pri;
1513 cur_thread_snap->ths_eqos = thread->effective_policy.thep_qos;
1514 cur_thread_snap->ths_rqos = thread->requested_policy.thrp_qos;
1515 cur_thread_snap->ths_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
1516 thread->requested_policy.thrp_qos_workq_override);
1517 cur_thread_snap->ths_io_tier = proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
1518 cur_thread_snap->ths_thread_t = VM_KERNEL_UNSLIDE_OR_PERM(thread);
1519
1520 static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
1521 static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
1522 cur_thread_snap->ths_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
1523 cur_thread_snap->ths_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
1524
1525 /* if there is thread name then add to buffer */
1526 cur_thread_name[0] = '\0';
1527 proc_threadname_kdp(thread->uthread, cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
1528 if (strnlen(cur_thread_name, STACKSHOT_MAX_THREAD_NAME_SIZE) > 0) {
1529 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_THREAD_NAME, sizeof(cur_thread_name), &out_addr));
1530 stackshot_memcpy((void *)out_addr, (void *)cur_thread_name, sizeof(cur_thread_name));
1531 }
1532
1533 /* record system, user, and runnable times */
1534 time_value_t user_time, system_time, runnable_time;
1535 thread_read_times(thread, &user_time, &system_time, &runnable_time);
1536 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_CPU_TIMES, sizeof(struct stackshot_cpu_times_v2), &out_addr));
1537 struct stackshot_cpu_times_v2 *stackshot_cpu_times = (struct stackshot_cpu_times_v2 *)out_addr;
1538 *stackshot_cpu_times = (struct stackshot_cpu_times_v2){
1539 .user_usec = (uint64_t)user_time.seconds * USEC_PER_SEC + user_time.microseconds,
1540 .system_usec = (uint64_t)system_time.seconds * USEC_PER_SEC + system_time.microseconds,
1541 .runnable_usec = (uint64_t)runnable_time.seconds * USEC_PER_SEC + runnable_time.microseconds,
1542 };
1543
1544 /* Trace user stack, if any */
1545 if (!active_kthreads_only_p && task->active && thread->task->map != kernel_map) {
1546 uint32_t thread_snapshot_flags = 0;
1547
1548 /* Uses 64-bit machine state? */
1549 if (is_64bit_data) {
1550 uint64_t sp = 0;
1551 out_addr = (mach_vm_address_t)kcd_end_address(kcd);
1552 saved_count = machine_trace_thread64(thread, (char *)out_addr, (char *)kcd_max_address(kcd), MAX_FRAMES, TRUE,
1553 trace_fp_p, &thread_snapshot_flags, &sp);
1554 if (saved_count > 0) {
1555 int frame_size = trace_fp_p ? sizeof(struct stack_snapshot_frame64) : sizeof(uint64_t);
1556 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, trace_fp_p ? STACKSHOT_KCTYPE_USER_STACKFRAME64
1557 : STACKSHOT_KCTYPE_USER_STACKLR64,
1558 frame_size, saved_count / frame_size, &out_addr));
1559 cur_thread_snap->ths_ss_flags |= kUser64_p;
1560 }
1561 #if __x86_64__
1562 if (sp) {
1563 // I'm using 8 here and not sizeof(stack_contents) because this
1564 // code would not work if you just made stack_contents bigger.
1565 vm_offset_t kern_virt_addr = machine_trace_thread_get_kva(sp, thread->task->map, &thread_snapshot_flags);
1566 if (kern_virt_addr && (kern_virt_addr % 8) == 0) {
1567 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_USER_STACKTOP, sizeof(struct stack_snapshot_stacktop), &out_addr));
1568 struct stack_snapshot_stacktop *stacktop = (struct stack_snapshot_stacktop *)out_addr;
1569 stacktop->sp = sp;
1570 memcpy(stacktop->stack_contents, (void*) kern_virt_addr, 8);
1571 }
1572 }
1573 #endif
1574 } else {
1575 out_addr = (mach_vm_address_t)kcd_end_address(kcd);
1576 saved_count = machine_trace_thread(thread, (char *)out_addr, (char *)kcd_max_address(kcd), MAX_FRAMES, TRUE, trace_fp_p,
1577 &thread_snapshot_flags);
1578 if (saved_count > 0) {
1579 int frame_size = trace_fp_p ? sizeof(struct stack_snapshot_frame32) : sizeof(uint32_t);
1580 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, trace_fp_p ? STACKSHOT_KCTYPE_USER_STACKFRAME
1581 : STACKSHOT_KCTYPE_USER_STACKLR,
1582 frame_size, saved_count / frame_size, &out_addr));
1583 }
1584 }
1585
1586 if (thread_snapshot_flags != 0) {
1587 cur_thread_snap->ths_ss_flags |= thread_snapshot_flags;
1588 }
1589 }
1590
1591 /* Call through to the machine specific trace routines
1592 * Frames are added past the snapshot header.
1593 */
1594 if (thread->kernel_stack != 0) {
1595 uint32_t thread_snapshot_flags = 0;
1596 #if defined(__LP64__)
1597 out_addr = (mach_vm_address_t)kcd_end_address(kcd);
1598 saved_count = machine_trace_thread64(thread, (char *)out_addr, (char *)kcd_max_address(kcd), MAX_FRAMES, FALSE, trace_fp_p,
1599 &thread_snapshot_flags, NULL);
1600 if (saved_count > 0) {
1601 int frame_size = trace_fp_p ? sizeof(struct stack_snapshot_frame64) : sizeof(uint64_t);
1602 cur_thread_snap->ths_ss_flags |= kKernel64_p;
1603 kcd_exit_on_error(kcdata_get_memory_addr_for_array(kcd, trace_fp_p ? STACKSHOT_KCTYPE_KERN_STACKFRAME64
1604 : STACKSHOT_KCTYPE_KERN_STACKLR64,
1605 frame_size, saved_count / frame_size, &out_addr));
1606 }
1607 #else
1608 out_addr = (mach_vm_address_t)kcd_end_address(kcd);
1609 saved_count = machine_trace_thread(thread, (char *)out_addr, (char *)kcd_max_address(kcd), MAX_FRAMES, FALSE, trace_fp_p,
1610 &thread_snapshot_flags);
1611 if (saved_count > 0) {
1612 int frame_size = trace_fp_p ? sizeof(struct stack_snapshot_frame32) : sizeof(uint32_t);
1613 kcd_exit_on_error(
1614 kcdata_get_memory_addr_for_array(kcd, trace_fp_p ? STACKSHOT_KCTYPE_KERN_STACKFRAME : STACKSHOT_KCTYPE_KERN_STACKLR,
1615 frame_size, saved_count / frame_size, &out_addr));
1616 }
1617 #endif
1618 if (thread_snapshot_flags != 0) {
1619 cur_thread_snap->ths_ss_flags |= thread_snapshot_flags;
1620 }
1621 }
1622
1623
1624 if (collect_iostats) {
1625 kcd_exit_on_error(kcdata_record_thread_iostats(kcd, thread));
1626 }
1627
1628 #if MONOTONIC
1629 if (collect_instrs_cycles) {
1630 uint64_t instrs = 0, cycles = 0;
1631 mt_stackshot_thread(thread, &instrs, &cycles);
1632
1633 kcd_exit_on_error(kcdata_get_memory_addr(kcd, STACKSHOT_KCTYPE_INSTRS_CYCLES, sizeof(struct instrs_cycles_snapshot), &out_addr));
1634 struct instrs_cycles_snapshot *instrs_cycles = (struct instrs_cycles_snapshot *)out_addr;
1635 instrs_cycles->ics_instructions = instrs;
1636 instrs_cycles->ics_cycles = cycles;
1637 }
1638 #endif /* MONOTONIC */
1639
1640 error_exit:
1641 return error;
1642 }
1643
1644 static int
1645 kcdata_record_thread_delta_snapshot(struct thread_delta_snapshot_v3 * cur_thread_snap, thread_t thread, boolean_t thread_on_core)
1646 {
1647 cur_thread_snap->tds_thread_id = thread_tid(thread);
1648 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
1649 cur_thread_snap->tds_voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
1650 } else {
1651 cur_thread_snap->tds_voucher_identifier = 0;
1652 }
1653
1654 cur_thread_snap->tds_ss_flags = 0;
1655 if (thread->effective_policy.thep_darwinbg) {
1656 cur_thread_snap->tds_ss_flags |= kThreadDarwinBG;
1657 }
1658 if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
1659 cur_thread_snap->tds_ss_flags |= kThreadIOPassive;
1660 }
1661 if (thread->suspend_count > 0) {
1662 cur_thread_snap->tds_ss_flags |= kThreadSuspended;
1663 }
1664 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1665 cur_thread_snap->tds_ss_flags |= kGlobalForcedIdle;
1666 }
1667 if (thread_on_core) {
1668 cur_thread_snap->tds_ss_flags |= kThreadOnCore;
1669 }
1670 if (stackshot_thread_is_idle_worker_unsafe(thread)) {
1671 cur_thread_snap->tds_ss_flags |= kThreadIdleWorker;
1672 }
1673
1674 cur_thread_snap->tds_last_made_runnable_time = thread->last_made_runnable_time;
1675 cur_thread_snap->tds_state = thread->state;
1676 cur_thread_snap->tds_sched_flags = thread->sched_flags;
1677 cur_thread_snap->tds_base_priority = thread->base_pri;
1678 cur_thread_snap->tds_sched_priority = thread->sched_pri;
1679 cur_thread_snap->tds_eqos = thread->effective_policy.thep_qos;
1680 cur_thread_snap->tds_rqos = thread->requested_policy.thrp_qos;
1681 cur_thread_snap->tds_rqos_override = MAX(thread->requested_policy.thrp_qos_override,
1682 thread->requested_policy.thrp_qos_workq_override);
1683 cur_thread_snap->tds_io_tier = proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
1684
1685 static_assert(sizeof(thread->effective_policy) == sizeof(uint64_t));
1686 static_assert(sizeof(thread->requested_policy) == sizeof(uint64_t));
1687 cur_thread_snap->tds_requested_policy = *(unaligned_u64 *) &thread->requested_policy;
1688 cur_thread_snap->tds_effective_policy = *(unaligned_u64 *) &thread->effective_policy;
1689
1690 return 0;
1691 }
1692
1693 /*
1694 * Why 12? 12 strikes a decent balance between allocating a large array on
1695 * the stack and having large kcdata item overheads for recording nonrunable
1696 * tasks.
1697 */
1698 #define UNIQUEIDSPERFLUSH 12
1699
1700 struct saved_uniqueids {
1701 uint64_t ids[UNIQUEIDSPERFLUSH];
1702 unsigned count;
1703 };
1704
1705 enum thread_classification {
1706 tc_full_snapshot, /* take a full snapshot */
1707 tc_delta_snapshot, /* take a delta snapshot */
1708 };
1709
1710 static enum thread_classification
1711 classify_thread(thread_t thread, boolean_t * thread_on_core_p, uint32_t trace_flags)
1712 {
1713 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
1714
1715 processor_t last_processor = thread->last_processor;
1716
1717 boolean_t thread_on_core =
1718 (last_processor != PROCESSOR_NULL &&
1719 (last_processor->state == PROCESSOR_SHUTDOWN || last_processor->state == PROCESSOR_RUNNING) &&
1720 last_processor->active_thread == thread);
1721
1722 *thread_on_core_p = thread_on_core;
1723
1724 /* Capture the full thread snapshot if this is not a delta stackshot or if the thread has run subsequent to the
1725 * previous full stackshot */
1726 if (!collect_delta_stackshot || thread_on_core || (thread->last_run_time > stack_snapshot_delta_since_timestamp)) {
1727 return tc_full_snapshot;
1728 } else {
1729 return tc_delta_snapshot;
1730 }
1731 }
1732
1733 struct stackshot_context {
1734 int pid;
1735 uint32_t trace_flags;
1736 };
1737
1738 static kern_return_t
1739 kdp_stackshot_record_task(struct stackshot_context *ctx, task_t task)
1740 {
1741 boolean_t active_kthreads_only_p = ((ctx->trace_flags & STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY) != 0);
1742 boolean_t save_donating_pids_p = ((ctx->trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
1743 boolean_t collect_delta_stackshot = ((ctx->trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
1744 boolean_t save_owner_info = ((ctx->trace_flags & STACKSHOT_THREAD_WAITINFO) != 0);
1745
1746
1747 kern_return_t error = KERN_SUCCESS;
1748 mach_vm_address_t out_addr = 0;
1749 int saved_count = 0;
1750
1751 int task_pid = 0;
1752 uint64_t task_uniqueid = 0;
1753 int num_delta_thread_snapshots = 0;
1754 int num_nonrunnable_threads = 0;
1755 int num_waitinfo_threads = 0;
1756 int num_turnstileinfo_threads = 0;
1757
1758 uint64_t task_start_abstime = 0;
1759 boolean_t task_delta_stackshot = FALSE;
1760 boolean_t have_map = FALSE, have_pmap = FALSE;
1761 boolean_t some_thread_ran = FALSE;
1762 unaligned_u64 *task_snap_ss_flags = NULL;
1763
1764 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
1765 uint64_t task_begin_cpu_cycle_count = 0;
1766 if (!panic_stackshot) {
1767 task_begin_cpu_cycle_count = mt_cur_cpu_cycles();
1768 }
1769 #endif
1770
1771 if ((task == NULL) || !ml_validate_nofault((vm_offset_t)task, sizeof(struct task))) {
1772 error = KERN_FAILURE;
1773 goto error_exit;
1774 }
1775
1776 have_map = (task->map != NULL) && (ml_validate_nofault((vm_offset_t)(task->map), sizeof(struct _vm_map)));
1777 have_pmap = have_map && (task->map->pmap != NULL) && (ml_validate_nofault((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
1778
1779 task_pid = pid_from_task(task);
1780 task_uniqueid = get_task_uniqueid(task);
1781
1782 if (!task->active || task_is_a_corpse(task)) {
1783 /*
1784 * Not interested in terminated tasks without threads, and
1785 * at the moment, stackshot can't handle a task without a name.
1786 */
1787 if (queue_empty(&task->threads) || task_pid == -1) {
1788 return KERN_SUCCESS;
1789 }
1790 }
1791
1792 if (collect_delta_stackshot) {
1793 proc_starttime_kdp(task->bsd_info, NULL, NULL, &task_start_abstime);
1794 }
1795
1796 /* Trace everything, unless a process was specified */
1797 if ((ctx->pid == -1) || (ctx->pid == task_pid)) {
1798 /* add task snapshot marker */
1799 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
1800 STACKSHOT_KCCONTAINER_TASK, task_uniqueid));
1801
1802 if (!collect_delta_stackshot || (task_start_abstime == 0) ||
1803 (task_start_abstime > stack_snapshot_delta_since_timestamp)) {
1804 kcd_exit_on_error(kcdata_record_task_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, &task_snap_ss_flags));
1805 } else {
1806 task_delta_stackshot = TRUE;
1807 kcd_exit_on_error(kcdata_record_task_delta_snapshot(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, &task_snap_ss_flags));
1808 }
1809
1810 /* Iterate over task threads */
1811 thread_t thread = THREAD_NULL;
1812 queue_iterate(&task->threads, thread, thread_t, task_threads)
1813 {
1814 uint64_t thread_uniqueid;
1815
1816 if ((thread == NULL) || !ml_validate_nofault((vm_offset_t)thread, sizeof(struct thread))) {
1817 error = KERN_FAILURE;
1818 goto error_exit;
1819 }
1820
1821 if (active_kthreads_only_p && thread->kernel_stack == 0) {
1822 continue;
1823 }
1824
1825 thread_uniqueid = thread_tid(thread);
1826
1827 boolean_t thread_on_core;
1828 enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, ctx->trace_flags);
1829
1830 switch (thread_classification) {
1831 case tc_full_snapshot:
1832 /* add thread marker */
1833 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_BEGIN,
1834 STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
1835 kcd_exit_on_error(
1836 kcdata_record_thread_snapshot(stackshot_kcdata_p, thread, task, ctx->trace_flags, have_pmap, thread_on_core));
1837
1838 /* mark end of thread snapshot data */
1839 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END,
1840 STACKSHOT_KCCONTAINER_THREAD, thread_uniqueid));
1841
1842 some_thread_ran = TRUE;
1843 break;
1844
1845 case tc_delta_snapshot:
1846 num_delta_thread_snapshots++;
1847 break;
1848 }
1849
1850 /* We want to report owner information regardless of whether a thread
1851 * has changed since the last delta, whether it's a normal stackshot,
1852 * or whether it's nonrunnable */
1853 if (save_owner_info) {
1854 if (stackshot_thread_has_valid_waitinfo(thread)) {
1855 num_waitinfo_threads++;
1856 }
1857
1858 if (stackshot_thread_has_valid_turnstileinfo(thread)) {
1859 num_turnstileinfo_threads++;
1860 }
1861 }
1862 }
1863
1864 struct thread_delta_snapshot_v3 * delta_snapshots = NULL;
1865 int current_delta_snapshot_index = 0;
1866
1867 if (num_delta_thread_snapshots > 0) {
1868 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_DELTA_SNAPSHOT,
1869 sizeof(struct thread_delta_snapshot_v3),
1870 num_delta_thread_snapshots, &out_addr));
1871 delta_snapshots = (struct thread_delta_snapshot_v3 *)out_addr;
1872 }
1873
1874 uint64_t * nonrunnable_tids = NULL;
1875
1876 if (num_nonrunnable_threads > 0) {
1877 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_NONRUNNABLE_TIDS,
1878 sizeof(uint64_t), num_nonrunnable_threads, &out_addr));
1879 nonrunnable_tids = (uint64_t *)out_addr;
1880 }
1881
1882 thread_waitinfo_t *thread_waitinfo = NULL;
1883 thread_turnstileinfo_t *thread_turnstileinfo = NULL;
1884 int current_waitinfo_index = 0;
1885 int current_turnstileinfo_index = 0;
1886
1887 if (num_waitinfo_threads > 0) {
1888 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_WAITINFO,
1889 sizeof(thread_waitinfo_t), num_waitinfo_threads, &out_addr));
1890 thread_waitinfo = (thread_waitinfo_t *)out_addr;
1891 }
1892
1893 if (num_turnstileinfo_threads > 0) {
1894 /* get space for the turnstile info */
1895 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_TURNSTILEINFO,
1896 sizeof(thread_turnstileinfo_t), num_turnstileinfo_threads, &out_addr));
1897 thread_turnstileinfo = (thread_turnstileinfo_t *)out_addr;
1898 }
1899
1900 if (num_delta_thread_snapshots > 0 || num_nonrunnable_threads > 0 ||
1901 num_waitinfo_threads > 0 || num_turnstileinfo_threads > 0) {
1902 queue_iterate(&task->threads, thread, thread_t, task_threads)
1903 {
1904 if (active_kthreads_only_p && thread->kernel_stack == 0) {
1905 continue;
1906 }
1907
1908 /* If we want owner info, we should capture it regardless of its classification */
1909 if (save_owner_info) {
1910 if (stackshot_thread_has_valid_waitinfo(thread)) {
1911 stackshot_thread_wait_owner_info(
1912 thread,
1913 &thread_waitinfo[current_waitinfo_index++]);
1914 }
1915
1916 if (stackshot_thread_has_valid_turnstileinfo(thread)) {
1917 stackshot_thread_turnstileinfo(
1918 thread,
1919 &thread_turnstileinfo[current_turnstileinfo_index++]);
1920 }
1921 }
1922
1923 boolean_t thread_on_core;
1924 enum thread_classification thread_classification = classify_thread(thread, &thread_on_core, ctx->trace_flags);
1925
1926 switch (thread_classification) {
1927 case tc_full_snapshot:
1928 /* full thread snapshot captured above */
1929 continue;
1930
1931 case tc_delta_snapshot:
1932 kcd_exit_on_error(kcdata_record_thread_delta_snapshot(&delta_snapshots[current_delta_snapshot_index++],
1933 thread, thread_on_core));
1934 break;
1935 }
1936 }
1937
1938 #if DEBUG || DEVELOPMENT
1939 if (current_delta_snapshot_index != num_delta_thread_snapshots) {
1940 panic("delta thread snapshot count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
1941 num_delta_thread_snapshots, current_delta_snapshot_index);
1942 }
1943 if (current_waitinfo_index != num_waitinfo_threads) {
1944 panic("thread wait info count mismatch while capturing snapshots for task %p. expected %d, found %d", task,
1945 num_waitinfo_threads, current_waitinfo_index);
1946 }
1947 #endif
1948 }
1949
1950 #if IMPORTANCE_INHERITANCE
1951 if (save_donating_pids_p) {
1952 kcd_exit_on_error(
1953 ((((mach_vm_address_t)kcd_end_address(stackshot_kcdata_p) + (TASK_IMP_WALK_LIMIT * sizeof(int32_t))) <
1954 (mach_vm_address_t)kcd_max_address(stackshot_kcdata_p))
1955 ? KERN_SUCCESS
1956 : KERN_RESOURCE_SHORTAGE));
1957 saved_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS,
1958 (void *)kcd_end_address(stackshot_kcdata_p), TASK_IMP_WALK_LIMIT);
1959 if (saved_count > 0) {
1960 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_DONATING_PIDS,
1961 sizeof(int32_t), saved_count, &out_addr));
1962 }
1963 }
1964 #endif
1965
1966 if (!collect_delta_stackshot || (num_delta_thread_snapshots != task->thread_count) || !task_delta_stackshot) {
1967 /*
1968 * Collect shared cache info and UUID info in these scenarios
1969 * 1) a full stackshot
1970 * 2) a delta stackshot where the task started after the previous full stackshot OR
1971 * any thread from the task has run since the previous full stackshot
1972 */
1973
1974 kcd_exit_on_error(kcdata_record_shared_cache_info(stackshot_kcdata_p, task, task_snap_ss_flags));
1975 kcd_exit_on_error(kcdata_record_uuid_info(stackshot_kcdata_p, task, ctx->trace_flags, have_pmap, task_snap_ss_flags));
1976 }
1977
1978 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
1979 if (!panic_stackshot) {
1980 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - task_begin_cpu_cycle_count),
1981 "task_cpu_cycle_count"));
1982 }
1983 #endif
1984 /* mark end of task snapshot data */
1985 kcd_exit_on_error(kcdata_add_container_marker(stackshot_kcdata_p, KCDATA_TYPE_CONTAINER_END, STACKSHOT_KCCONTAINER_TASK,
1986 task_uniqueid));
1987 }
1988
1989 error_exit:
1990 return error;
1991 }
1992
1993
1994 static kern_return_t
1995 kdp_stackshot_kcdata_format(int pid, uint32_t trace_flags, uint32_t * pBytesTraced)
1996 {
1997 kern_return_t error = KERN_SUCCESS;
1998 mach_vm_address_t out_addr = 0;
1999 uint64_t abs_time = 0, abs_time_end = 0;
2000 uint64_t *abs_time_addr = NULL;
2001 uint64_t system_state_flags = 0;
2002 task_t task = TASK_NULL;
2003 mach_timebase_info_data_t timebase = {0, 0};
2004 uint32_t length_to_copy = 0, tmp32 = 0;
2005 abs_time = mach_absolute_time();
2006
2007 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2008 uint64_t stackshot_begin_cpu_cycle_count = 0;
2009
2010 if (!panic_stackshot) {
2011 stackshot_begin_cpu_cycle_count = mt_cur_cpu_cycles();
2012 }
2013 #endif
2014
2015 /* process the flags */
2016 boolean_t collect_delta_stackshot = ((trace_flags & STACKSHOT_COLLECT_DELTA_SNAPSHOT) != 0);
2017 boolean_t use_fault_path = ((trace_flags & (STACKSHOT_ENABLE_UUID_FAULTING | STACKSHOT_ENABLE_BT_FAULTING)) != 0);
2018 stack_enable_faulting = (trace_flags & (STACKSHOT_ENABLE_BT_FAULTING));
2019
2020 #if CONFIG_EMBEDDED
2021 /* KEXTs can't be described by just a base address on embedded */
2022 trace_flags &= ~(STACKSHOT_SAVE_KEXT_LOADINFO);
2023 #endif
2024
2025 struct stackshot_context ctx = {};
2026 ctx.trace_flags = trace_flags;
2027 ctx.pid = pid;
2028
2029 if (use_fault_path) {
2030 fault_stats.sfs_pages_faulted_in = 0;
2031 fault_stats.sfs_time_spent_faulting = 0;
2032 fault_stats.sfs_stopped_faulting = (uint8_t) FALSE;
2033 }
2034
2035 if (sizeof(void *) == 8) {
2036 system_state_flags |= kKernel64_p;
2037 }
2038
2039 if (stackshot_kcdata_p == NULL || pBytesTraced == NULL) {
2040 error = KERN_INVALID_ARGUMENT;
2041 goto error_exit;
2042 }
2043
2044 /* setup mach_absolute_time and timebase info -- copy out in some cases and needed to convert since_timestamp to seconds for proc start time */
2045 clock_timebase_info(&timebase);
2046
2047 /* begin saving data into the buffer */
2048 *pBytesTraced = 0;
2049 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, trace_flags, "stackshot_in_flags"));
2050 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, (uint32_t)pid, "stackshot_in_pid"));
2051 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, system_state_flags, "system_state_flags"));
2052
2053 #if CONFIG_JETSAM
2054 tmp32 = memorystatus_get_pressure_status_kdp();
2055 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_LEVEL, sizeof(uint32_t), &out_addr));
2056 stackshot_memcpy((void *)out_addr, &tmp32, sizeof(tmp32));
2057 #endif
2058
2059 if (!collect_delta_stackshot) {
2060 tmp32 = THREAD_POLICY_INTERNAL_STRUCT_VERSION;
2061 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_THREAD_POLICY_VERSION, sizeof(uint32_t), &out_addr));
2062 stackshot_memcpy((void *)out_addr, &tmp32, sizeof(tmp32));
2063
2064 tmp32 = PAGE_SIZE;
2065 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_KERN_PAGE_SIZE, sizeof(uint32_t), &out_addr));
2066 stackshot_memcpy((void *)out_addr, &tmp32, sizeof(tmp32));
2067
2068 /* save boot-args and osversion string */
2069 length_to_copy = MIN((uint32_t)(strlen(version) + 1), OSVERSIZE);
2070 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_OSVERSION, length_to_copy, &out_addr));
2071 stackshot_strlcpy((char*)out_addr, &version[0], length_to_copy);
2072
2073 length_to_copy = MIN((uint32_t)(strlen(PE_boot_args()) + 1), BOOT_LINE_LENGTH);
2074 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_BOOTARGS, length_to_copy, &out_addr));
2075 stackshot_strlcpy((char*)out_addr, PE_boot_args(), length_to_copy);
2076
2077 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, KCDATA_TYPE_TIMEBASE, sizeof(timebase), &out_addr));
2078 stackshot_memcpy((void *)out_addr, &timebase, sizeof(timebase));
2079 } else {
2080 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_DELTA_SINCE_TIMESTAMP, sizeof(uint64_t), &out_addr));
2081 stackshot_memcpy((void*)out_addr, &stack_snapshot_delta_since_timestamp, sizeof(stack_snapshot_delta_since_timestamp));
2082 }
2083
2084 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, KCDATA_TYPE_MACH_ABSOLUTE_TIME, sizeof(uint64_t), &out_addr));
2085 abs_time_addr = (uint64_t *)out_addr;
2086 stackshot_memcpy((void *)abs_time_addr, &abs_time, sizeof(uint64_t));
2087
2088 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, KCDATA_TYPE_USECS_SINCE_EPOCH, sizeof(uint64_t), &out_addr));
2089 stackshot_memcpy((void *)out_addr, &stackshot_microsecs, sizeof(uint64_t));
2090
2091 /* record system level shared cache load info (if available) */
2092 if (!collect_delta_stackshot && init_task_shared_region &&
2093 ml_validate_nofault((vm_offset_t)init_task_shared_region, sizeof(struct vm_shared_region))) {
2094 struct dyld_uuid_info_64_v2 *sys_shared_cache_info = NULL;
2095 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_SHAREDCACHE_LOADINFO,
2096 sizeof(struct dyld_uuid_info_64_v2), &out_addr));
2097 sys_shared_cache_info = (struct dyld_uuid_info_64_v2 *)out_addr;
2098
2099 stackshot_memcpy(sys_shared_cache_info->imageUUID, &init_task_shared_region->sr_uuid, sizeof(init_task_shared_region->sr_uuid));
2100 sys_shared_cache_info->imageLoadAddress = init_task_shared_region->sr_slide_info.slide;
2101 sys_shared_cache_info->imageSlidBaseAddress = init_task_shared_region->sr_slide_info.slide + init_task_shared_region->sr_base_address;
2102
2103 if (trace_flags & STACKSHOT_COLLECT_SHAREDCACHE_LAYOUT) {
2104 /*
2105 * Include a map of the system shared cache layout if it has been populated
2106 * (which is only when the system is using a custom shared cache).
2107 */
2108 if (init_task_shared_region->sr_images && ml_validate_nofault((vm_offset_t)init_task_shared_region->sr_images,
2109 (init_task_shared_region->sr_images_count * sizeof(struct dyld_uuid_info_64)))) {
2110 assert(init_task_shared_region->sr_images_count != 0);
2111 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_SYS_SHAREDCACHE_LAYOUT,
2112 sizeof(struct dyld_uuid_info_64),
2113 init_task_shared_region->sr_images_count, &out_addr));
2114 stackshot_memcpy((void*)out_addr, init_task_shared_region->sr_images,
2115 (init_task_shared_region->sr_images_count * sizeof(struct dyld_uuid_info_64)));
2116 }
2117 }
2118 }
2119
2120 /* Add requested information first */
2121 if (trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
2122 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_GLOBAL_MEM_STATS, sizeof(struct mem_and_io_snapshot), &out_addr));
2123 kdp_mem_and_io_snapshot((struct mem_and_io_snapshot *)out_addr);
2124 }
2125
2126 #if CONFIG_COALITIONS
2127 int num_coalitions = 0;
2128 struct jetsam_coalition_snapshot *coalitions = NULL;
2129
2130 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2131 uint64_t coalition_begin_cpu_cycle_count = 0;
2132
2133 if (!panic_stackshot && (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS)) {
2134 coalition_begin_cpu_cycle_count = mt_cur_cpu_cycles();
2135 }
2136 #endif
2137
2138 /* Iterate over coalitions */
2139 if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
2140 if (coalition_iterate_stackshot(stackshot_coalition_jetsam_count, &num_coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
2141 trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
2142 }
2143 }
2144 if (trace_flags & STACKSHOT_SAVE_JETSAM_COALITIONS) {
2145 if (num_coalitions > 0) {
2146 kcd_exit_on_error(kcdata_get_memory_addr_for_array(stackshot_kcdata_p, STACKSHOT_KCTYPE_JETSAM_COALITION_SNAPSHOT, sizeof(struct jetsam_coalition_snapshot), num_coalitions, &out_addr));
2147 coalitions = (struct jetsam_coalition_snapshot*)out_addr;
2148 }
2149
2150 if (coalition_iterate_stackshot(stackshot_coalition_jetsam_snapshot, coalitions, COALITION_TYPE_JETSAM) != KERN_SUCCESS) {
2151 error = KERN_FAILURE;
2152 goto error_exit;
2153 }
2154 }
2155 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2156 if (!panic_stackshot && (coalition_begin_cpu_cycle_count != 0)) {
2157 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - coalition_begin_cpu_cycle_count),
2158 "coalitions_cpu_cycle_count"));
2159 }
2160 #endif
2161 #else
2162 trace_flags &= ~(STACKSHOT_SAVE_JETSAM_COALITIONS);
2163 #endif /* CONFIG_COALITIONS */
2164
2165 trace_flags &= ~(STACKSHOT_THREAD_GROUP);
2166
2167
2168 /* Iterate over tasks */
2169 queue_iterate(&tasks, task, task_t, tasks)
2170 {
2171 error = kdp_stackshot_record_task(&ctx, task);
2172 if (error) {
2173 goto error_exit;
2174 }
2175 }
2176 /*
2177 * Iterate over the tasks in the terminated tasks list. We only inspect
2178 * tasks that have a valid bsd_info pointer where P_LPEXIT is NOT set.
2179 * We're only interested in tasks that have remaining threads (which
2180 * could be involved in a deadlock, etc), and the last thread that tears
2181 * itself down during exit sets P_LPEXIT during proc_exit().
2182 */
2183 queue_iterate(&terminated_tasks, task, task_t, tasks)
2184 {
2185 if (task->bsd_info && !proc_in_teardown(task->bsd_info)) {
2186 error = kdp_stackshot_record_task(&ctx, task);
2187 if (error) {
2188 goto error_exit;
2189 }
2190 }
2191 }
2192
2193 if (use_fault_path) {
2194 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS,
2195 sizeof(struct stackshot_fault_stats), &out_addr));
2196 stackshot_memcpy((void*)out_addr, &fault_stats, sizeof(struct stackshot_fault_stats));
2197 }
2198
2199 /* update timestamp of the stackshot */
2200 abs_time_end = mach_absolute_time();
2201 #if DEVELOPMENT || DEBUG
2202 kcd_exit_on_error(kcdata_get_memory_addr(stackshot_kcdata_p, STACKSHOT_KCTYPE_STACKSHOT_DURATION,
2203 sizeof(struct stackshot_duration), &out_addr));
2204 struct stackshot_duration * stackshot_duration = (struct stackshot_duration *)out_addr;
2205 stackshot_duration->stackshot_duration = (abs_time_end - abs_time);
2206 stackshot_duration->stackshot_duration_outer = 0;
2207 stackshot_duration_outer = (unaligned_u64 *)&stackshot_duration->stackshot_duration_outer;
2208 #endif
2209 stackshot_memcpy((void *)abs_time_addr, &abs_time_end, sizeof(uint64_t));
2210
2211 kcd_exit_on_error(kcdata_add_uint32_with_description(stackshot_kcdata_p, trace_flags, "stackshot_out_flags"));
2212
2213 #if INTERRUPT_MASKED_DEBUG && MONOTONIC
2214 if (!panic_stackshot) {
2215 kcd_exit_on_error(kcdata_add_uint64_with_description(stackshot_kcdata_p, (mt_cur_cpu_cycles() - stackshot_begin_cpu_cycle_count),
2216 "stackshot_total_cpu_cycle_cnt"));
2217 }
2218 #endif
2219
2220 kcd_exit_on_error(kcdata_write_buffer_end(stackshot_kcdata_p));
2221
2222 /* === END of populating stackshot data === */
2223
2224 *pBytesTraced = (uint32_t) kcdata_memory_get_used_bytes(stackshot_kcdata_p);
2225 error_exit:
2226
2227 #if INTERRUPT_MASKED_DEBUG
2228 if (!panic_stackshot) {
2229 /*
2230 * Try to catch instances where stackshot takes too long BEFORE returning from
2231 * the debugger
2232 */
2233 ml_check_interrupts_disabled_duration(current_thread());
2234 }
2235 #endif
2236
2237 stack_enable_faulting = FALSE;
2238
2239 return error;
2240 }
2241
2242 static uint64_t
2243 proc_was_throttled_from_task(task_t task)
2244 {
2245 uint64_t was_throttled = 0;
2246
2247 if (task->bsd_info) {
2248 was_throttled = proc_was_throttled(task->bsd_info);
2249 }
2250
2251 return was_throttled;
2252 }
2253
2254 static uint64_t
2255 proc_did_throttle_from_task(task_t task)
2256 {
2257 uint64_t did_throttle = 0;
2258
2259 if (task->bsd_info) {
2260 did_throttle = proc_did_throttle(task->bsd_info);
2261 }
2262
2263 return did_throttle;
2264 }
2265
2266 static void
2267 kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
2268 {
2269 unsigned int pages_reclaimed;
2270 unsigned int pages_wanted;
2271 kern_return_t kErr;
2272
2273 processor_t processor;
2274 vm_statistics64_t stat;
2275 vm_statistics64_data_t host_vm_stat;
2276
2277 processor = processor_list;
2278 stat = &PROCESSOR_DATA(processor, vm_stat);
2279 host_vm_stat = *stat;
2280
2281 if (processor_count > 1) {
2282 /*
2283 * processor_list may be in the process of changing as we are
2284 * attempting a stackshot. Ordinarily it will be lock protected,
2285 * but it is not safe to lock in the context of the debugger.
2286 * Fortunately we never remove elements from the processor list,
2287 * and only add to to the end of the list, so we SHOULD be able
2288 * to walk it. If we ever want to truly tear down processors,
2289 * this will have to change.
2290 */
2291 while ((processor = processor->processor_list) != NULL) {
2292 stat = &PROCESSOR_DATA(processor, vm_stat);
2293 host_vm_stat.compressions += stat->compressions;
2294 host_vm_stat.decompressions += stat->decompressions;
2295 }
2296 }
2297
2298 memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
2299 memio_snap->free_pages = vm_page_free_count;
2300 memio_snap->active_pages = vm_page_active_count;
2301 memio_snap->inactive_pages = vm_page_inactive_count;
2302 memio_snap->purgeable_pages = vm_page_purgeable_count;
2303 memio_snap->wired_pages = vm_page_wire_count;
2304 memio_snap->speculative_pages = vm_page_speculative_count;
2305 memio_snap->throttled_pages = vm_page_throttled_count;
2306 memio_snap->busy_buffer_count = count_busy_buffers();
2307 memio_snap->filebacked_pages = vm_page_pageable_external_count;
2308 memio_snap->compressions = (uint32_t)host_vm_stat.compressions;
2309 memio_snap->decompressions = (uint32_t)host_vm_stat.decompressions;
2310 memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
2311 kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
2312
2313 if (!kErr) {
2314 memio_snap->pages_wanted = (uint32_t)pages_wanted;
2315 memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
2316 memio_snap->pages_wanted_reclaimed_valid = 1;
2317 } else {
2318 memio_snap->pages_wanted = 0;
2319 memio_snap->pages_reclaimed = 0;
2320 memio_snap->pages_wanted_reclaimed_valid = 0;
2321 }
2322 }
2323
2324 void
2325 stackshot_memcpy(void *dst, const void *src, size_t len)
2326 {
2327 #if CONFIG_EMBEDDED
2328 if (panic_stackshot) {
2329 uint8_t *dest_bytes = (uint8_t *)dst;
2330 const uint8_t *src_bytes = (const uint8_t *)src;
2331 for (size_t i = 0; i < len; i++) {
2332 dest_bytes[i] = src_bytes[i];
2333 }
2334 } else
2335 #endif
2336 memcpy(dst, src, len);
2337 }
2338
2339 size_t
2340 stackshot_strlcpy(char *dst, const char *src, size_t maxlen)
2341 {
2342 const size_t srclen = strlen(src);
2343
2344 if (srclen < maxlen) {
2345 stackshot_memcpy(dst, src, srclen + 1);
2346 } else if (maxlen != 0) {
2347 stackshot_memcpy(dst, src, maxlen - 1);
2348 dst[maxlen - 1] = '\0';
2349 }
2350
2351 return srclen;
2352 }
2353
2354
2355 /*
2356 * Returns the physical address of the specified map:target address,
2357 * using the kdp fault path if requested and the page is not resident.
2358 */
2359 vm_offset_t
2360 kdp_find_phys(vm_map_t map, vm_offset_t target_addr, boolean_t try_fault, uint32_t *kdp_fault_results)
2361 {
2362 vm_offset_t cur_phys_addr;
2363 unsigned cur_wimg_bits;
2364 uint64_t fault_start_time = 0;
2365
2366 if (map == VM_MAP_NULL) {
2367 return 0;
2368 }
2369
2370 cur_phys_addr = kdp_vtophys(map->pmap, target_addr);
2371 if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
2372 if (!try_fault || fault_stats.sfs_stopped_faulting) {
2373 if (kdp_fault_results) {
2374 *kdp_fault_results |= KDP_FAULT_RESULT_PAGED_OUT;
2375 }
2376
2377 return 0;
2378 }
2379
2380 /*
2381 * The pmap doesn't have a valid page so we start at the top level
2382 * vm map and try a lightweight fault. Update fault path usage stats.
2383 */
2384 fault_start_time = mach_absolute_time();
2385 cur_phys_addr = kdp_lightweight_fault(map, (target_addr & ~PAGE_MASK));
2386 fault_stats.sfs_time_spent_faulting += (mach_absolute_time() - fault_start_time);
2387
2388 if ((fault_stats.sfs_time_spent_faulting >= fault_stats.sfs_system_max_fault_time) && !panic_stackshot) {
2389 fault_stats.sfs_stopped_faulting = (uint8_t) TRUE;
2390 }
2391
2392 cur_phys_addr += (target_addr & PAGE_MASK);
2393
2394 if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
2395 if (kdp_fault_results) {
2396 *kdp_fault_results |= (KDP_FAULT_RESULT_TRIED_FAULT | KDP_FAULT_RESULT_PAGED_OUT);
2397 }
2398
2399 return 0;
2400 }
2401
2402 if (kdp_fault_results) {
2403 *kdp_fault_results |= KDP_FAULT_RESULT_FAULTED_IN;
2404 }
2405
2406 fault_stats.sfs_pages_faulted_in++;
2407 } else {
2408 /*
2409 * This check is done in kdp_lightweight_fault for the fault path.
2410 */
2411 cur_wimg_bits = pmap_cache_attributes((ppnum_t) atop(cur_phys_addr));
2412
2413 if ((cur_wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
2414 return 0;
2415 }
2416 }
2417
2418 return cur_phys_addr;
2419 }
2420
2421 boolean_t
2422 kdp_copyin_word(
2423 task_t task, uint64_t addr, uint64_t *result, boolean_t try_fault, uint32_t *kdp_fault_results)
2424 {
2425 if (task_has_64Bit_addr(task)) {
2426 return kdp_copyin(task->map, addr, result, sizeof(uint64_t), try_fault, kdp_fault_results);
2427 } else {
2428 uint32_t buf;
2429 boolean_t r = kdp_copyin(task->map, addr, &buf, sizeof(uint32_t), try_fault, kdp_fault_results);
2430 *result = buf;
2431 return r;
2432 }
2433 }
2434
2435 int
2436 kdp_copyin_string(
2437 task_t task, uint64_t addr, char *buf, int buf_sz, boolean_t try_fault, uint32_t *kdp_fault_results)
2438 {
2439 int i;
2440 uint64_t validated = 0, valid_from;
2441 uint64_t phys_src, phys_dest;
2442
2443 for (i = 0; i < buf_sz; i++) {
2444 if (validated == 0) {
2445 valid_from = i;
2446 phys_src = kdp_find_phys(task->map, addr + i, try_fault, kdp_fault_results);
2447 phys_dest = kvtophys((vm_offset_t)&buf[i]);
2448 uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK);
2449 uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
2450 if (phys_src && phys_dest) {
2451 validated = MIN(src_rem, dst_rem);
2452 if (validated) {
2453 bcopy_phys(phys_src, phys_dest, 1);
2454 validated--;
2455 } else {
2456 return 0;
2457 }
2458 } else {
2459 return 0;
2460 }
2461 } else {
2462 bcopy_phys(phys_src + (i - valid_from), phys_dest + (i - valid_from), 1);
2463 validated--;
2464 }
2465
2466 if (buf[i] == '\0') {
2467 return i + 1;
2468 }
2469 }
2470
2471 /* ran out of space */
2472 return -1;
2473 }
2474
2475 boolean_t
2476 kdp_copyin(vm_map_t map, uint64_t uaddr, void *dest, size_t size, boolean_t try_fault, uint32_t *kdp_fault_results)
2477 {
2478 size_t rem = size;
2479 char *kvaddr = dest;
2480
2481 #if CONFIG_EMBEDDED
2482 /* Identify if destination buffer is in panic storage area */
2483 if (panic_stackshot && ((vm_offset_t)dest >= gPanicBase) && ((vm_offset_t)dest < (gPanicBase + gPanicSize))) {
2484 if (((vm_offset_t)dest + size) > (gPanicBase + gPanicSize)) {
2485 return FALSE;
2486 }
2487 }
2488 #endif
2489
2490 while (rem) {
2491 uint64_t phys_src = kdp_find_phys(map, uaddr, try_fault, kdp_fault_results);
2492 uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr);
2493 uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK);
2494 uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
2495 size_t cur_size = (uint32_t) MIN(src_rem, dst_rem);
2496 cur_size = MIN(cur_size, rem);
2497
2498 if (phys_src && phys_dest) {
2499 #if CONFIG_EMBEDDED
2500 /*
2501 * On embedded the panic buffer is mapped as device memory and doesn't allow
2502 * unaligned accesses. To prevent these, we copy over bytes individually here.
2503 */
2504 if (panic_stackshot) {
2505 stackshot_memcpy(kvaddr, (const void *)phystokv(phys_src), cur_size);
2506 } else
2507 #endif /* CONFIG_EMBEDDED */
2508 bcopy_phys(phys_src, phys_dest, cur_size);
2509 } else {
2510 break;
2511 }
2512
2513 uaddr += cur_size;
2514 kvaddr += cur_size;
2515 rem -= cur_size;
2516 }
2517
2518 return rem == 0;
2519 }
2520
2521 kern_return_t
2522 do_stackshot(void *context)
2523 {
2524 #pragma unused(context)
2525 kdp_snapshot++;
2526
2527 stack_snapshot_ret = kdp_stackshot_kcdata_format(stack_snapshot_pid,
2528 stack_snapshot_flags,
2529 &stack_snapshot_bytes_traced);
2530
2531 kdp_snapshot--;
2532 return stack_snapshot_ret;
2533 }
2534
2535 /*
2536 * A fantastical routine that tries to be fast about returning
2537 * translations. Caches the last page we found a translation
2538 * for, so that we can be quick about multiple queries to the
2539 * same page. It turns out this is exactly the workflow
2540 * machine_trace_thread and its relatives tend to throw at us.
2541 *
2542 * Please zero the nasty global this uses after a bulk lookup;
2543 * this isn't safe across a switch of the map or changes
2544 * to a pmap.
2545 *
2546 * This also means that if zero is a valid KVA, we are
2547 * screwed. Sucks to be us. Fortunately, this should never
2548 * happen.
2549 */
2550 vm_offset_t
2551 machine_trace_thread_get_kva(vm_offset_t cur_target_addr, vm_map_t map, uint32_t *thread_trace_flags)
2552 {
2553 vm_offset_t cur_target_page;
2554 vm_offset_t cur_phys_addr;
2555 vm_offset_t kern_virt_target_addr;
2556 uint32_t kdp_fault_results = 0;
2557
2558 cur_target_page = atop(cur_target_addr);
2559
2560 if ((cur_target_page != prev_target_page) || validate_next_addr) {
2561 /*
2562 * Alright; it wasn't our previous page. So
2563 * we must validate that there is a page
2564 * table entry for this address under the
2565 * current pmap, and that it has default
2566 * cache attributes (otherwise it may not be
2567 * safe to access it).
2568 */
2569 cur_phys_addr = kdp_find_phys(map, cur_target_addr, stack_enable_faulting, &kdp_fault_results);
2570 if (thread_trace_flags) {
2571 if (kdp_fault_results & KDP_FAULT_RESULT_PAGED_OUT) {
2572 *thread_trace_flags |= kThreadTruncatedBT;
2573 }
2574
2575 if (kdp_fault_results & KDP_FAULT_RESULT_TRIED_FAULT) {
2576 *thread_trace_flags |= kThreadTriedFaultBT;
2577 }
2578
2579 if (kdp_fault_results & KDP_FAULT_RESULT_FAULTED_IN) {
2580 *thread_trace_flags |= kThreadFaultedBT;
2581 }
2582 }
2583
2584 if (cur_phys_addr == 0) {
2585 return 0;
2586 }
2587 kern_virt_target_addr = phystokv(cur_phys_addr);
2588 prev_target_page = cur_target_page;
2589 prev_target_kva = (kern_virt_target_addr & ~PAGE_MASK);
2590 validate_next_addr = FALSE;
2591 } else {
2592 /* We found a translation, so stash this page */
2593 kern_virt_target_addr = prev_target_kva + (cur_target_addr & PAGE_MASK);
2594 }
2595
2596 #if KASAN
2597 kasan_notify_address(kern_virt_target_addr, sizeof(uint64_t));
2598 #endif
2599 return kern_virt_target_addr;
2600 }
2601
2602 void
2603 machine_trace_thread_clear_validation_cache(void)
2604 {
2605 validate_next_addr = TRUE;
2606 }
2607
2608 boolean_t
2609 stackshot_thread_is_idle_worker_unsafe(thread_t thread)
2610 {
2611 /* When the pthread kext puts a worker thread to sleep, it will
2612 * set kThreadWaitParkedWorkQueue in the block_hint of the thread
2613 * struct. See parkit() in kern/kern_support.c in libpthread.
2614 */
2615 return (thread->state & TH_WAIT) &&
2616 (thread->block_hint == kThreadWaitParkedWorkQueue);
2617 }
2618
2619 #if CONFIG_COALITIONS
2620 static void
2621 stackshot_coalition_jetsam_count(void *arg, int i, coalition_t coal)
2622 {
2623 #pragma unused(i, coal)
2624 unsigned int *coalition_count = (unsigned int*)arg;
2625 (*coalition_count)++;
2626 }
2627
2628 static void
2629 stackshot_coalition_jetsam_snapshot(void *arg, int i, coalition_t coal)
2630 {
2631 if (coalition_type(coal) != COALITION_TYPE_JETSAM) {
2632 return;
2633 }
2634
2635 struct jetsam_coalition_snapshot *coalitions = (struct jetsam_coalition_snapshot*)arg;
2636 struct jetsam_coalition_snapshot *jcs = &coalitions[i];
2637 task_t leader = TASK_NULL;
2638 jcs->jcs_id = coalition_id(coal);
2639 jcs->jcs_flags = 0;
2640 jcs->jcs_thread_group = 0;
2641
2642 if (coalition_term_requested(coal)) {
2643 jcs->jcs_flags |= kCoalitionTermRequested;
2644 }
2645 if (coalition_is_terminated(coal)) {
2646 jcs->jcs_flags |= kCoalitionTerminated;
2647 }
2648 if (coalition_is_reaped(coal)) {
2649 jcs->jcs_flags |= kCoalitionReaped;
2650 }
2651 if (coalition_is_privileged(coal)) {
2652 jcs->jcs_flags |= kCoalitionPrivileged;
2653 }
2654
2655
2656 leader = kdp_coalition_get_leader(coal);
2657 if (leader) {
2658 jcs->jcs_leader_task_uniqueid = get_task_uniqueid(leader);
2659 } else {
2660 jcs->jcs_leader_task_uniqueid = 0;
2661 }
2662 }
2663 #endif /* CONFIG_COALITIONS */
2664
2665
2666 /* Determine if a thread has waitinfo that stackshot can provide */
2667 static int
2668 stackshot_thread_has_valid_waitinfo(thread_t thread)
2669 {
2670 if (!(thread->state & TH_WAIT)) {
2671 return 0;
2672 }
2673
2674 switch (thread->block_hint) {
2675 // If set to None or is a parked work queue, ignore it
2676 case kThreadWaitParkedWorkQueue:
2677 case kThreadWaitNone:
2678 return 0;
2679 // There is a short window where the pthread kext removes a thread
2680 // from its ksyn wait queue before waking the thread up
2681 case kThreadWaitPThreadMutex:
2682 case kThreadWaitPThreadRWLockRead:
2683 case kThreadWaitPThreadRWLockWrite:
2684 case kThreadWaitPThreadCondVar:
2685 return kdp_pthread_get_thread_kwq(thread) != NULL;
2686 // All other cases are valid block hints if in a wait state
2687 default:
2688 return 1;
2689 }
2690 }
2691
2692 /* Determine if a thread has turnstileinfo that stackshot can provide */
2693 static int
2694 stackshot_thread_has_valid_turnstileinfo(thread_t thread)
2695 {
2696 struct turnstile *ts = thread_get_waiting_turnstile(thread);
2697
2698 return stackshot_thread_has_valid_waitinfo(thread) &&
2699 ts != TURNSTILE_NULL;
2700 }
2701
2702 static void
2703 stackshot_thread_turnstileinfo(thread_t thread, thread_turnstileinfo_t *tsinfo)
2704 {
2705 struct turnstile *ts;
2706
2707 /* acquire turnstile information and store it in the stackshot */
2708 ts = thread_get_waiting_turnstile(thread);
2709 tsinfo->waiter = thread_tid(thread);
2710 kdp_turnstile_fill_tsinfo(ts, tsinfo);
2711 }
2712
2713 static void
2714 stackshot_thread_wait_owner_info(thread_t thread, thread_waitinfo_t *waitinfo)
2715 {
2716 waitinfo->waiter = thread_tid(thread);
2717 waitinfo->wait_type = thread->block_hint;
2718
2719 switch (waitinfo->wait_type) {
2720 case kThreadWaitKernelMutex:
2721 kdp_lck_mtx_find_owner(thread->waitq, thread->wait_event, waitinfo);
2722 break;
2723 case kThreadWaitPortReceive:
2724 kdp_mqueue_recv_find_owner(thread->waitq, thread->wait_event, waitinfo);
2725 break;
2726 case kThreadWaitPortSend:
2727 kdp_mqueue_send_find_owner(thread->waitq, thread->wait_event, waitinfo);
2728 break;
2729 case kThreadWaitSemaphore:
2730 kdp_sema_find_owner(thread->waitq, thread->wait_event, waitinfo);
2731 break;
2732 case kThreadWaitUserLock:
2733 kdp_ulock_find_owner(thread->waitq, thread->wait_event, waitinfo);
2734 break;
2735 case kThreadWaitKernelRWLockRead:
2736 case kThreadWaitKernelRWLockWrite:
2737 case kThreadWaitKernelRWLockUpgrade:
2738 kdp_rwlck_find_owner(thread->waitq, thread->wait_event, waitinfo);
2739 break;
2740 case kThreadWaitPThreadMutex:
2741 case kThreadWaitPThreadRWLockRead:
2742 case kThreadWaitPThreadRWLockWrite:
2743 case kThreadWaitPThreadCondVar:
2744 kdp_pthread_find_owner(thread, waitinfo);
2745 break;
2746 case kThreadWaitWorkloopSyncWait:
2747 kdp_workloop_sync_wait_find_owner(thread, thread->wait_event, waitinfo);
2748 break;
2749 case kThreadWaitOnProcess:
2750 kdp_wait4_find_process(thread, thread->wait_event, waitinfo);
2751 break;
2752 case kThreadWaitSleepWithInheritor:
2753 kdp_sleep_with_inheritor_find_owner(thread->waitq, thread->wait_event, waitinfo);
2754 break;
2755 default:
2756 waitinfo->owner = 0;
2757 waitinfo->context = 0;
2758 break;
2759 }
2760 }