]> git.saurik.com Git - apple/xnu.git/blame - tests/stackshot_accuracy.m
xnu-7195.81.3.tar.gz
[apple/xnu.git] / tests / stackshot_accuracy.m
CommitLineData
4ba76501
A
1#include <darwintest.h>
2#include <darwintest_utils.h>
3#include <sys/kern_memorystatus.h>
4#include <kern/debug.h>
5#include <mach-o/dyld.h>
6#include <sys/stackshot.h>
7#include <kdd.h>
8#include <signal.h>
9
10#define RECURSIONS 25
11#define FIRST_RECURSIVE_FRAME 3
12
13T_GLOBAL_META(
14 T_META_NAMESPACE("xnu.stackshot.accuracy"),
15 T_META_CHECK_LEAKS(false),
16 T_META_ASROOT(true)
17 );
18
19
20void child_init(void);
21void parent_helper_singleproc(int);
22
23#define CHECK_FOR_FAULT_STATS (1 << 0)
24#define WRITE_STACKSHOT_BUFFER_TO_TMP (1 << 1)
25#define CHECK_FOR_KERNEL_THREADS (1 << 2)
26int check_stackshot(void *, int);
27
28/* used for WRITE_STACKSHOT_BUFFER_TO_TMP */
29static char const *current_scenario_name;
30static pid_t child_pid;
31
32/* helpers */
33
34static void __attribute__((noinline))
35child_recurse(int r, int spin, void (^cb)(void))
36{
37 if (r > 0) {
38 child_recurse(r - 1, spin, cb);
39 }
40
41 cb();
42
43 /* wait forever */
44 if (spin == 0) {
45 sleep(100000);
46 } else if (spin == 2) {
47 int v = 1;
48 /* ssh won't let the session die if we still have file handles open to its output. */
49 close(STDERR_FILENO);
50 close(STDOUT_FILENO);
51 T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.wedge_thread", NULL, NULL, &v, sizeof(v)),
52 "wedged thread in the kernel");
53 } else {
54 while (1) {
55 __asm__ volatile("" : : : "memory");
56 }
57 }
58}
59
60T_HELPER_DECL(simple_child_process, "child process that will be frozen and others")
61{
62 child_init();
63}
64
65T_HELPER_DECL(sid_child_process, "child process that setsid()s")
66{
67 pid_t ppid = getppid();
68
69 T_ASSERT_POSIX_SUCCESS(setsid(), "session id set");
70
71 child_recurse(RECURSIONS, 2, ^{
72 kill(ppid, SIGUSR1);
73 });
74
75 T_ASSERT_FAIL("child_init returned!");
76}
77
78static void
79kill_children(void)
80{
81 kill(child_pid, SIGKILL);
82}
83
84static void *
f427ee49 85take_stackshot(pid_t target_pid, uint64_t extra_flags, uint64_t since_timestamp)
4ba76501
A
86{
87 void *stackshot_config;
88 int err, retries = 5;
f427ee49 89 uint64_t stackshot_flags = STACKSHOT_KCDATA_FORMAT |
4ba76501
A
90 STACKSHOT_THREAD_WAITINFO |
91 STACKSHOT_GET_DQ;
92
93 /* we should be able to verify delta stackshots */
94 if (since_timestamp != 0) {
95 stackshot_flags |= STACKSHOT_COLLECT_DELTA_SNAPSHOT;
96 }
97
98 stackshot_flags |= extra_flags;
99
100 stackshot_config = stackshot_config_create();
101 T_ASSERT_NOTNULL(stackshot_config, "allocate stackshot config");
102
103 err = stackshot_config_set_flags(stackshot_config, stackshot_flags);
104 T_ASSERT_EQ(err, 0, "set flags on stackshot config");
105
106 err = stackshot_config_set_pid(stackshot_config, target_pid);
107 T_ASSERT_EQ(err, 0, "set target pid on stackshot config");
108
109 if (since_timestamp != 0) {
110 err = stackshot_config_set_delta_timestamp(stackshot_config, since_timestamp);
111 T_ASSERT_EQ(err, 0, "set prev snapshot time on stackshot config");
112 }
113
114 while (retries > 0) {
115 err = stackshot_capture_with_config(stackshot_config);
116 if (err == 0) {
117 break;
118 } else if (err == EBUSY || err == ETIMEDOUT) {
119 T_LOG("stackshot capture returned %d (%s)\n", err, strerror(err));
120 if (retries == 0) {
121 T_ASSERT_FAIL("failed to take stackshot with error after retries: %d: %s\n", err, strerror(err));
122 }
123
124 retries--;
125 continue;
126 } else {
127 T_ASSERT_FAIL("failed to take stackshot with error: %d: %s\n", err, strerror(err));
128 }
129 }
130
131 return stackshot_config;
132}
133
134int
135check_stackshot(void *stackshot_config, int flags)
136{
137 void *buf;
138 uint32_t buflen, kcdata_type;
139 kcdata_iter_t iter;
140 NSError *nserror = nil;
141 pid_t target_pid;
142 int ret = 0;
143 uint64_t expected_return_addr = 0;
144 bool found_fault_stats = false;
145 struct stackshot_fault_stats fault_stats = {0};
146
147 buf = stackshot_config_get_stackshot_buffer(stackshot_config);
148 T_ASSERT_NOTNULL(buf, "stackshot buffer is not null");
149 buflen = stackshot_config_get_stackshot_size(stackshot_config);
150 T_ASSERT_GT(buflen, 0, "valid stackshot buffer length");
151 target_pid = ((struct stackshot_config*)stackshot_config)->sc_pid;
152 T_ASSERT_GT(target_pid, 0, "valid target_pid");
153
154 /* if need to write it to fs, do it now */
155 if (flags & WRITE_STACKSHOT_BUFFER_TO_TMP) {
156 char sspath[MAXPATHLEN];
157 strlcpy(sspath, current_scenario_name, sizeof(sspath));
158 strlcat(sspath, ".kcdata", sizeof(sspath));
159 T_QUIET; T_ASSERT_POSIX_ZERO(dt_resultfile(sspath, sizeof(sspath)),
160 "create result file path");
161
162 FILE *f = fopen(sspath, "w");
163 T_WITH_ERRNO; T_QUIET; T_ASSERT_NOTNULL(f,
164 "open stackshot output file");
165
166 size_t written = fwrite(buf, buflen, 1, f);
167 T_QUIET; T_ASSERT_POSIX_SUCCESS(written, "wrote stackshot to file");
168
169 fclose(f);
170 }
171
172 /* begin iterating */
173 iter = kcdata_iter(buf, buflen);
174 T_ASSERT_EQ(kcdata_iter_type(iter), KCDATA_BUFFER_BEGIN_STACKSHOT, "buffer is a stackshot");
175
176 /* time to iterate */
177 iter = kcdata_iter_next(iter);
178 KCDATA_ITER_FOREACH(iter) {
179 kcdata_type = kcdata_iter_type(iter);
180 NSNumber *parsedPid;
181 NSMutableDictionary *parsedContainer, *parsedThreads;
182
183 if ((flags & CHECK_FOR_FAULT_STATS) != 0 &&
184 kcdata_type == STACKSHOT_KCTYPE_STACKSHOT_FAULT_STATS) {
185 memcpy(&fault_stats, kcdata_iter_payload(iter), sizeof(fault_stats));
186 found_fault_stats = true;
187 }
188
189 if (kcdata_type != KCDATA_TYPE_CONTAINER_BEGIN) {
190 continue;
191 }
192
193 if (kcdata_iter_container_type(iter) != STACKSHOT_KCCONTAINER_TASK) {
194 continue;
195 }
196
197 parsedContainer = parseKCDataContainer(&iter, &nserror);
198 T_ASSERT_NOTNULL(parsedContainer, "parsedContainer is not null");
199 T_ASSERT_NULL(nserror, "no NSError occured while parsing the kcdata container");
200
201 /*
202 * given that we've targetted the pid, we can be sure that this
203 * ts_pid will be the pid we expect
204 */
205 parsedPid = parsedContainer[@"task_snapshots"][@"task_snapshot"][@"ts_pid"];
206 T_ASSERT_EQ([parsedPid intValue], target_pid, "found correct pid");
207
208 /* start parsing the threads */
209 parsedThreads = parsedContainer[@"task_snapshots"][@"thread_snapshots"];
210 for (id th_key in parsedThreads) {
211 uint32_t frame_index = 0;
212
213 if ((flags & CHECK_FOR_KERNEL_THREADS) == 0) {
214 /* skip threads that don't have enough frames */
215 if ([parsedThreads[th_key][@"user_stack_frames"] count] < RECURSIONS) {
216 continue;
217 }
218
219 for (id frame in parsedThreads[th_key][@"user_stack_frames"]) {
220 if ((frame_index >= FIRST_RECURSIVE_FRAME) && (frame_index < (RECURSIONS - FIRST_RECURSIVE_FRAME))) {
221 if (expected_return_addr == 0ull) {
222 expected_return_addr = [frame[@"lr"] unsignedLongLongValue];
223 } else {
224 T_QUIET;
225 T_ASSERT_EQ(expected_return_addr, [frame[@"lr"] unsignedLongLongValue], "expected return address found");
226 }
227 }
228 frame_index ++;
229 }
230 } else {
231 T_ASSERT_NOTNULL(parsedThreads[th_key][@"kernel_stack_frames"],
232 "found kernel stack frames");
233 }
234
235 }
236 }
237
238 if (found_fault_stats) {
239 T_LOG("number of pages faulted in: %d", fault_stats.sfs_pages_faulted_in);
240 T_LOG("MATUs spent faulting: %lld", fault_stats.sfs_time_spent_faulting);
241 T_LOG("MATUS fault time limit: %lld", fault_stats.sfs_system_max_fault_time);
242 T_LOG("did we stop because of the limit?: %s", fault_stats.sfs_stopped_faulting ? "yes" : "no");
243 if (expected_return_addr != 0ull) {
244 T_ASSERT_GT(fault_stats.sfs_pages_faulted_in, 0, "faulted at least one page in");
245 T_LOG("NOTE: successfully faulted in the pages");
246 } else {
247 T_LOG("NOTE: We were not able to fault the stack's pages back in");
248
249 /* if we couldn't fault the pages back in, then at least verify that we tried */
250 T_ASSERT_GT(fault_stats.sfs_time_spent_faulting, 0ull, "spent time trying to fault");
251 }
252 } else if ((flags & CHECK_FOR_KERNEL_THREADS) == 0) {
253 T_ASSERT_NE(expected_return_addr, 0ull, "found child thread with recursions");
254 }
255
256 if (flags & CHECK_FOR_FAULT_STATS) {
257 T_ASSERT_EQ(found_fault_stats, true, "found fault stats");
258 }
259
260 return ret;
261}
262
263void
264child_init(void)
265{
266#if !TARGET_OS_OSX
267 int freeze_state;
268#endif /* !TARGET_OS_OSX */
269 pid_t pid = getpid();
270 char padding[16 * 1024];
271 __asm__ volatile(""::"r"(padding));
272
273 T_LOG("child pid: %d\n", pid);
274
275#if !TARGET_OS_OSX
276 /* allow us to be frozen */
277 freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, pid, 0, NULL, 0);
ea3f0419 278 if (freeze_state == 0) {
4ba76501
A
279 T_LOG("CHILD was found to be UNFREEZABLE, enabling freezing.");
280 memorystatus_control(MEMORYSTATUS_CMD_SET_PROCESS_IS_FREEZABLE, pid, 1, NULL, 0);
281 freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, pid, 0, NULL, 0);
282 T_ASSERT_EQ(freeze_state, 1, "successfully set freezeability");
283 }
284#else
285 T_LOG("Cannot change freezeability as freezing is only available on embedded devices");
286#endif /* !TARGET_OS_OSX */
287
288 /*
289 * recurse a bunch of times to generate predictable data in the stackshot,
290 * then send SIGUSR1 to the parent to let it know that we are done.
291 */
292 child_recurse(RECURSIONS, 0, ^{
293 kill(getppid(), SIGUSR1);
294 });
295
296 T_ASSERT_FAIL("child_recurse returned, but it must not?");
297}
298
299void
300parent_helper_singleproc(int spin)
301{
302 dispatch_semaphore_t child_done_sema = dispatch_semaphore_create(0);
303 dispatch_queue_t dq = dispatch_queue_create("com.apple.stackshot_accuracy.basic_sp", NULL);
304 void *stackshot_config;
305
306 dispatch_async(dq, ^{
307 char padding[16 * 1024];
308 __asm__ volatile(""::"r"(padding));
309
310 child_recurse(RECURSIONS, spin, ^{
311 dispatch_semaphore_signal(child_done_sema);
312 });
313 });
314
315 dispatch_semaphore_wait(child_done_sema, DISPATCH_TIME_FOREVER);
316 T_LOG("done waiting for child");
317
318 /* take the stackshot and parse it */
319 stackshot_config = take_stackshot(getpid(), 0, 0);
320
321 /* check that the stackshot has the stack frames */
322 check_stackshot(stackshot_config, 0);
323
324 T_LOG("done!");
325}
326
327T_DECL(basic, "test that no-fault stackshot works correctly")
328{
329 char path[PATH_MAX];
330 uint32_t path_size = sizeof(path);
331 char *args[] = { path, "-n", "simple_child_process", NULL };
332 dispatch_queue_t dq = dispatch_queue_create("com.apple.stackshot_accuracy.basic", NULL);
333 dispatch_semaphore_t child_done_sema = dispatch_semaphore_create(0);
334 dispatch_source_t child_sig_src;
335 void *stackshot_config;
336
337 current_scenario_name = __func__;
338
339 T_LOG("parent pid: %d\n", getpid());
340 T_QUIET; T_ASSERT_POSIX_ZERO(_NSGetExecutablePath(path, &path_size), "_NSGetExecutablePath");
341
ea3f0419
A
342 /* check if we can run the child successfully */
343#if !TARGET_OS_OSX
344 int freeze_state = memorystatus_control(MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE, getpid(), 0, NULL, 0);
345 if (freeze_state == -1) {
346 T_SKIP("This device doesn't have CONFIG_FREEZE enabled.");
347 }
348#endif
349
4ba76501
A
350 /* setup signal handling */
351 signal(SIGUSR1, SIG_IGN);
352 child_sig_src = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dq);
353 dispatch_source_set_event_handler(child_sig_src, ^{
354 dispatch_semaphore_signal(child_done_sema);
355 });
356 dispatch_activate(child_sig_src);
357
358 /* create the child process */
359 T_ASSERT_POSIX_SUCCESS(dt_launch_tool(&child_pid, args, false, NULL, NULL), "child launched");
360 T_ATEND(kill_children);
361
362 /* wait until the child has recursed enough */
ea3f0419 363 dispatch_semaphore_wait(child_done_sema, dispatch_time(DISPATCH_TIME_NOW, 10 /*seconds*/ * 1000000000ULL));
4ba76501
A
364
365 T_LOG("child finished, parent executing");
366
367 /* take the stackshot and parse it */
368 stackshot_config = take_stackshot(child_pid, 0, 0);
369
370 /* check that the stackshot has the stack frames */
371 check_stackshot(stackshot_config, 0);
372
373 T_LOG("all done, killing child");
374
375 /* tell the child to quit */
376 T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGTERM), "killed child");
377}
378
379T_DECL(basic_singleproc, "test that no-fault stackshot works correctly in single process setting")
380{
381 current_scenario_name = __func__;
382 parent_helper_singleproc(0);
383}
384
385T_DECL(basic_singleproc_spin, "test that no-fault stackshot works correctly in single process setting with spinning")
386{
387 current_scenario_name = __func__;
388 parent_helper_singleproc(1);
389}
390
391T_DECL(fault, "test that faulting stackshots work correctly")
392{
393 dispatch_queue_t dq = dispatch_queue_create("com.apple.stackshot_fault_accuracy", NULL);
394 dispatch_source_t child_sig_src;
395 dispatch_semaphore_t child_done_sema = dispatch_semaphore_create(0);
396 void *stackshot_config;
397 int oldftm, newval = 1, freeze_enabled, oldratio, newratio = 0;
398 size_t oldlen = sizeof(oldftm), fe_len = sizeof(freeze_enabled), ratiolen = sizeof(oldratio);
399 char path[PATH_MAX];
400 uint32_t path_size = sizeof(path);
401 char *args[] = { path, "-n", "simple_child_process", NULL };
402
403 current_scenario_name = __func__;
404 T_QUIET; T_ASSERT_POSIX_ZERO(_NSGetExecutablePath(path, &path_size), "_NSGetExecutablePath");
405
406#if TARGET_OS_OSX
407 T_SKIP("freezing is not available on macOS");
408#endif /* TARGET_OS_OSX */
409
410 /* Try checking if freezing is enabled at all */
411 if (sysctlbyname("vm.freeze_enabled", &freeze_enabled, &fe_len, NULL, 0) == -1) {
412 if (errno == ENOENT) {
413 T_SKIP("This device doesn't have CONFIG_FREEZE enabled.");
414 } else {
415 T_FAIL("failed to query vm.freeze_enabled, errno: %d", errno);
416 }
417 }
418
419 if (!freeze_enabled) {
420 T_SKIP("Freeze is not enabled, skipping test.");
421 }
422
423 /* signal handling */
424 signal(SIGUSR1, SIG_IGN);
425 child_sig_src = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dq);
426 dispatch_source_set_event_handler(child_sig_src, ^{
427 dispatch_semaphore_signal(child_done_sema);
428 });
429 dispatch_activate(child_sig_src);
430
431 T_ASSERT_POSIX_SUCCESS(dt_launch_tool(&child_pid, args, false, NULL, NULL), "child launched");
432 T_ATEND(kill_children);
433
434 dispatch_semaphore_wait(child_done_sema, DISPATCH_TIME_FOREVER);
435
436 /* keep processes in memory */
437 T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.memorystatus_freeze_to_memory", &oldftm, &oldlen, &newval, sizeof(newval)),
438 "disabled freezing to disk");
439
440 /* set the ratio to zero */
441 T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.memorystatus_freeze_private_shared_pages_ratio", &oldratio, &ratiolen, &newratio, sizeof(newratio)), "disabled private:shared ratio checking");
442
443 /* freeze the child */
444 T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.memorystatus_freeze", NULL, 0, &child_pid, sizeof(child_pid)),
445 "froze child");
446
447 /* Sleep to allow the compressor to finish compressing the child */
448 sleep(5);
449
450 /* take the stackshot and parse it */
451 stackshot_config = take_stackshot(child_pid, STACKSHOT_ENABLE_BT_FAULTING | STACKSHOT_ENABLE_UUID_FAULTING, 0);
452
453 /* check that the stackshot has the stack frames */
454 check_stackshot(stackshot_config, CHECK_FOR_FAULT_STATS);
455
456 T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.memorystatus_freeze_to_memory", NULL, 0, &oldftm, sizeof(oldftm)),
457 "reset freezing to disk");
458
459 /* reset the private:shared ratio */
460 T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.memorystatus_freeze_private_shared_pages_ratio", NULL, 0, &oldratio, sizeof(oldratio)), "reset private:shared ratio");
461
462 T_LOG("all done, killing child");
463
464 /* tell the child to quit */
465 T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGTERM), "killed child");
466}
467
468T_DECL(fault_singleproc, "test that faulting stackshots work correctly in a single process setting")
469{
470 dispatch_semaphore_t child_done_sema = dispatch_semaphore_create(0);
471 dispatch_queue_t dq = dispatch_queue_create("com.apple.stackshot_accuracy.fault_sp", NULL);
472 void *stackshot_config;
473 __block pthread_t child_thread;
474 char *child_stack;
475 size_t child_stacklen;
476
477#if !TARGET_OS_OSX
478 T_SKIP("madvise(..., ..., MADV_PAGEOUT) is not available on embedded platforms");
479#endif /* !TARGET_OS_OSX */
480
481 dispatch_async(dq, ^{
482 char padding[16 * 1024];
483 __asm__ volatile(""::"r"(padding));
484
485 child_recurse(RECURSIONS, 0, ^{
486 child_thread = pthread_self();
487 dispatch_semaphore_signal(child_done_sema);
488 });
489 });
490
491 dispatch_semaphore_wait(child_done_sema, DISPATCH_TIME_FOREVER);
492 T_LOG("done waiting for child");
493
494 child_stack = pthread_get_stackaddr_np(child_thread);
495 child_stacklen = pthread_get_stacksize_np(child_thread);
496 child_stack -= child_stacklen;
497 T_LOG("child stack: [0x%p - 0x%p]: 0x%zu bytes", (void *)child_stack,
498 (void *)(child_stack + child_stacklen), child_stacklen);
499
500 /* paging out the child */
501 T_ASSERT_POSIX_SUCCESS(madvise(child_stack, child_stacklen, MADV_PAGEOUT), "paged out via madvise(2) the child stack");
502
503 /* take the stackshot and parse it */
504 stackshot_config = take_stackshot(getpid(), STACKSHOT_ENABLE_BT_FAULTING | STACKSHOT_ENABLE_UUID_FAULTING, 0);
505
506 /* check that the stackshot has the stack frames */
507 check_stackshot(stackshot_config, CHECK_FOR_FAULT_STATS);
508
509 T_LOG("done!");
510}
511
512T_DECL(zombie, "test that threads wedged in the kernel can be stackshot'd")
513{
514 dispatch_queue_t dq = dispatch_queue_create("com.apple.stackshot_accuracy.zombie", NULL);
515 dispatch_semaphore_t child_done_sema = dispatch_semaphore_create(0);
516 dispatch_source_t child_sig_src;
517 void *stackshot_config;
518 char path[PATH_MAX];
519 uint32_t path_size = sizeof(path);
520 char *args[] = { path, "-n", "sid_child_process", NULL };
521
522 current_scenario_name = __func__;
523 T_QUIET; T_ASSERT_POSIX_ZERO(_NSGetExecutablePath(path, &path_size), "_NSGetExecutablePath");
524
525 T_LOG("parent pid: %d\n", getpid());
526
527 /* setup signal handling */
528 signal(SIGUSR1, SIG_IGN);
529 child_sig_src = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dq);
530 dispatch_source_set_event_handler(child_sig_src, ^{
531 dispatch_semaphore_signal(child_done_sema);
532 });
533 dispatch_activate(child_sig_src);
534
535 /* create the child process */
536 T_ASSERT_POSIX_SUCCESS(dt_launch_tool(&child_pid, args, false, NULL, NULL), "child launched");
537 T_ATEND(kill_children);
538
539 /* wait until the child has recursed enough */
540 dispatch_semaphore_wait(child_done_sema, DISPATCH_TIME_FOREVER);
541
542 T_LOG("child finished, parent executing. invoking jetsam");
543
544 T_ASSERT_POSIX_SUCCESS(memorystatus_control(MEMORYSTATUS_CMD_TEST_JETSAM, child_pid, 0, 0, 0),
545 "jetsam'd the child");
546
547 /* Sleep to allow the target process to become zombified */
548 sleep(1);
549
550 /* take the stackshot and parse it */
551 stackshot_config = take_stackshot(child_pid, 0, 0);
552
553 /* check that the stackshot has the stack frames */
554 check_stackshot(stackshot_config, CHECK_FOR_KERNEL_THREADS);
555
556 T_LOG("all done, unwedging and killing child");
557
558 int v = 1;
559 T_ASSERT_POSIX_SUCCESS(sysctlbyname("kern.unwedge_thread", NULL, NULL, &v, sizeof(v)),
560 "unwedged child");
561
562 /* tell the child to quit */
563 T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGTERM), "killed child");
564}