]> git.saurik.com Git - apple/xnu.git/blob - tests/memorystatus_vm_map_fork.c
65f29b569fa7e7e392abe45c606b7188aa61bca5
[apple/xnu.git] / tests / memorystatus_vm_map_fork.c
1 #include <stdio.h>
2 #include <unistd.h>
3 #include <stdlib.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <assert.h>
7 #include <signal.h>
8 #include <spawn.h>
9 #include <spawn_private.h>
10 #include <stdint.h>
11 #include <sys/sysctl.h>
12 #include <sys/spawn_internal.h>
13 #include <sys/kern_memorystatus.h>
14 #include <mach-o/dyld.h>
15
16 #include <darwintest.h>
17 #include <darwintest_utils.h>
18
19 #include "test_utils.h"
20
21 T_GLOBAL_META(
22 T_META_NAMESPACE("xnu.vm"),
23 T_META_CHECK_LEAKS(false)
24 );
25
26 extern char **environ;
27
28 /*
29 * This test file contains two sub-tests which attempt to verify
30 * the allowing or not allowing of a corpse for crashreporter when
31 * a task exceeds its memory allocation limit. vm_map_fork() is the
32 * kernel routine used to generate a corpse task.
33 *
34 * A corpse is allowed to be taken if a task's memory resource limit that
35 * is exceeded is less than 1/4 of the system wide task limit.
36 * If the amount exceeds 1/4 the sytem wide limit, then the corpse is disallowed.
37 *
38 * If the device under test is already under pressure, the test
39 * could fail due to jetsam cutting in and killing the parent, child or
40 * other necessary testing processes.
41 */
42
43 /* Test variants */
44 #define TEST_ALLOWED 0x1
45 #define TEST_NOT_ALLOWED 0x2
46
47 /*
48 * Values which the kernel OR's into the PID when a corpse
49 * is either allowed or disallowed for the
50 * kern.memorystatus_vm_map_fork_pidwatch sysctl.
51 */
52 #define MEMORYSTATUS_VM_MAP_FORK_ALLOWED 0x100000000ul
53 #define MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED 0x200000000ul
54
55 /*
56 * The memory allocation happens in a child process, this
57 * is stuff to deal with creating and managing the child.
58 * The child will only execute the T_HELPER_DECL.
59 */
60 static char testpath[PATH_MAX];
61 static uint32_t testpath_size = sizeof(testpath);
62 #define LIMIT_DELTA_MB 5 /* an arbitrary limit delta */
63 #define MEGABYTE (1024 * 1024)
64
65 /*
66 * The child process communicates back to parent via an exit() code.
67 */
68 enum child_exits {
69 NORMAL_EXIT = 0,
70 NO_MEMSIZE_ARG,
71 INVALID_MEMSIZE,
72 MALLOC_FAILED,
73 NUM_CHILD_EXIT
74 };
75 static char *child_exit_why[] = {
76 "normal exit",
77 "no memsize argument to child",
78 "invalid memsize argument to child",
79 "malloc() failed",
80 };
81
82 /*
83 * Set/Get the sysctl used to determine if corpse collection occurs.
84 * This is done by the kernel checking for a specific PID.
85 */
86 static void
87 set_memorystatus_vm_map_fork_pidwatch(pid_t pid)
88 {
89 uint64_t new_value = (uint64_t)pid;
90 size_t new_len = sizeof(new_value);
91 int err;
92
93 err = sysctlbyname("kern.memorystatus_vm_map_fork_pidwatch", NULL, NULL, &new_value, new_len);
94 T_QUIET;
95 T_ASSERT_POSIX_SUCCESS(err, "set sysctlbyname(kern.memorystatus_vm_map_fork_pidwatch...) failed");
96 return;
97 }
98
99 static uint64_t
100 get_memorystatus_vm_map_fork_pidwatch()
101 {
102 uint64_t value = 0;
103 size_t val_len = sizeof(value);
104 int err;
105
106 err = sysctlbyname("kern.memorystatus_vm_map_fork_pidwatch", &value, &val_len, NULL, 0);
107 T_QUIET;
108 T_ASSERT_POSIX_SUCCESS(err, "get sysctlbyname(kern.memorystatus_vm_map_fork_pidwatch...) failed");
109
110 return value;
111 }
112
113 /*
114 * We want to avoid jetsam giving us bad results, if possible. So check if there's
115 * enough memory for the test to run, waiting briefly for some to free up.
116 */
117 static void
118 wait_for_free_mem(int need_mb)
119 {
120 int64_t memsize;
121 int memorystatus_level;
122 size_t size;
123 int64_t avail;
124 int err;
125 int try;
126
127 /*
128 * get amount of memory in the machine
129 */
130 size = sizeof(memsize);
131 err = sysctlbyname("hw.memsize", &memsize, &size, NULL, 0);
132 T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "sysctlbyname(hw.memsize...) failed");
133
134 /*
135 * Use a loop to briefly sleep and recheck if short on memory.
136 */
137 try = 1;
138 for (;;) {
139 /*
140 * memorystatus_level is a percentage of memory available. For example 20 means 1/5 of memory.
141 * It currently doesn't exist on macOS but neither does jetsam, so pass the test there.
142 */
143 size = sizeof(memorystatus_level);
144 if (sysctlbyname("kern.memorystatus_level", &memorystatus_level, &size, NULL, 0) != 0) {
145 return;
146 }
147 T_QUIET; T_ASSERT_LE(memorystatus_level, 100, "memorystatus_level too high");
148 T_QUIET; T_ASSERT_GT(memorystatus_level, 0, "memorystatus_level negative");
149
150 /*
151 * jetsam kicks in at memory status level of 15%, so subtract that much out of what's available.
152 */
153 avail = MAX(0, (memsize * (memorystatus_level - 15)) / 100);
154
155 /*
156 * We're good to go if there's more than enough available.
157 */
158 if ((int64_t)need_mb * MEGABYTE < avail) {
159 return;
160 }
161
162 /*
163 * issue a message to log and sleep briefly to see if we can get more memory
164 */
165 if (try-- == 0) {
166 break;
167 }
168 T_LOG("Need %d MB, only %d MB available. sleeping 5 seconds for more to free. memorystatus_level %d",
169 need_mb, (int)(avail / MEGABYTE), memorystatus_level);
170 sleep(5);
171 }
172 T_SKIP("Needed %d MB, but only %d MB available. Skipping test to avoid jetsam issues.",
173 need_mb, (int)(avail / MEGABYTE));
174 }
175
176
177 /*
178 * The main test calls this to spawn child process which will run and
179 * exceed some memory limit. The child is initially suspended so that
180 * we can do the sysctl calls before it runs.
181 * Since this is a libdarwintest, the "-n" names the T_HELPER_DECL() that
182 * we want to run. The arguments specific to the test follow a "--".
183 */
184 static pid_t
185 spawn_child_process(
186 char * const executable,
187 char * const memlimit,
188 short flags,
189 int priority,
190 int active_limit_mb,
191 int inactive_limit_mb)
192 {
193 posix_spawnattr_t spawn_attrs;
194 int err;
195 pid_t child_pid;
196 char * const argv_child[] = { executable, "-n", "child_process", "--", memlimit, NULL };
197
198 err = posix_spawnattr_init(&spawn_attrs);
199 T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_init() failed");
200
201 err = posix_spawnattr_setflags(&spawn_attrs, POSIX_SPAWN_START_SUSPENDED);
202 T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_setflags() failed");
203
204 err = posix_spawnattr_setjetsam_ext(&spawn_attrs, flags, priority, active_limit_mb, inactive_limit_mb);
205 T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_setjetsam_ext() failed");
206
207 err = posix_spawn(&child_pid, executable, NULL, &spawn_attrs, argv_child, environ);
208 T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawn() failed");
209
210 return child_pid;
211 }
212
213
214 /*
215 * The parent calls this to continue the suspended child, then wait for its result.
216 * We collect its resource usage to vefiry the expected amount allocated.
217 */
218 static void
219 test_child_process(pid_t child_pid, int *status, struct rusage *ru)
220 {
221 int err = 0;
222 pid_t got_pid;
223
224 T_LOG(" continuing child[%d]\n", child_pid);
225
226 err = kill(child_pid, SIGCONT);
227 T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " kill(%d, SIGCONT) failed", child_pid);
228
229 T_LOG(" waiting for child[%d] to exit", child_pid);
230
231 got_pid = wait4(child_pid, status, 0, ru);
232 T_QUIET; T_ASSERT_EQ(child_pid, got_pid, " wait4(%d, ...) returned %d", child_pid, got_pid);
233 }
234
235 /*
236 * The child process executes this code. The easiest way, with given darwintest infrastructure,
237 * it has to return information is via exit status.
238 */
239 T_HELPER_DECL(child_process, "child allocates memory to failure")
240 {
241 #define BYTESPERALLOC MEGABYTE
242 #define BYTESINEXCESS (2 * MEGABYTE) /* 2 MB - arbitrary */
243 char *limit;
244 long limit_mb = 0;
245 long max_bytes_to_munch, bytes_remaining, bytes_this_munch;
246 void *mem = NULL;
247
248 /*
249 * This helper is run in a child process. The helper sees one argument
250 * as a string which is the amount of memory in megabytes to allocate.
251 */
252 if (argc != 1) {
253 exit(NO_MEMSIZE_ARG);
254 }
255
256 limit = argv[0];
257 errno = 0;
258 limit_mb = strtol(limit, NULL, 10);
259 if (errno != 0 || limit_mb <= 0) {
260 exit(INVALID_MEMSIZE);
261 }
262
263 /* Compute in excess of assigned limit */
264 max_bytes_to_munch = limit_mb * MEGABYTE;
265 max_bytes_to_munch += BYTESINEXCESS;
266
267 for (bytes_remaining = max_bytes_to_munch; bytes_remaining > 0; bytes_remaining -= bytes_this_munch) {
268 bytes_this_munch = MIN(bytes_remaining, BYTESPERALLOC);
269
270 mem = malloc((size_t)bytes_this_munch);
271 if (mem == NULL) {
272 exit(MALLOC_FAILED);
273 }
274 arc4random_buf(mem, (size_t)bytes_this_munch);
275 }
276
277 /* We chewed up all the memory we were asked to. */
278 exit(NORMAL_EXIT);
279 }
280
281
282 /*
283 * Actual test body.
284 */
285 static void
286 memorystatus_vm_map_fork_parent(int test_variant)
287 {
288 int max_task_pmem = 0; /* MB */
289 size_t size = 0;
290 int active_limit_mb = 0;
291 int inactive_limit_mb = 0;
292 short flags = 0;
293 char memlimit_str[16];
294 pid_t child_pid;
295 int child_status;
296 uint64_t kernel_pidwatch_val;
297 uint64_t expected_pidwatch_val;
298 int ret;
299 struct rusage ru;
300 enum child_exits exit_val;
301
302 /*
303 * The code to set/get the pidwatch sysctl is only in
304 * development kernels. Skip the test if not on one.
305 */
306 if (!is_development_kernel()) {
307 T_SKIP("Can't test on release kernel");
308 }
309
310 /*
311 * Determine a memory limit based on system having one or not.
312 */
313 size = sizeof(max_task_pmem);
314 (void)sysctlbyname("kern.max_task_pmem", &max_task_pmem, &size, NULL, 0);
315 if (max_task_pmem <= 0) {
316 max_task_pmem = 0;
317 }
318
319 if (test_variant == TEST_ALLOWED) {
320 /*
321 * Tell the child to allocate less than 1/4 the system wide limit.
322 */
323 if (max_task_pmem / 4 - LIMIT_DELTA_MB <= 0) {
324 active_limit_mb = LIMIT_DELTA_MB;
325 } else {
326 active_limit_mb = max_task_pmem / 4 - LIMIT_DELTA_MB;
327 }
328 expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED;
329 } else { /* TEST_NOT_ALLOWED */
330 /*
331 * Tell the child to allocate more than 1/4 the system wide limit.
332 */
333 active_limit_mb = (max_task_pmem / 4) + LIMIT_DELTA_MB;
334 if (max_task_pmem == 0) {
335 expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED;
336 } else {
337 expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED;
338 }
339 }
340 inactive_limit_mb = active_limit_mb;
341 T_LOG("using limit of %d Meg", active_limit_mb);
342
343 /*
344 * When run as part of a larger suite, a previous test
345 * may have left the system temporarily with too little
346 * memory to run this test. We try to detect if there is
347 * enough free memory to proceed, waiting a little bit
348 * for memory to free up.
349 */
350 wait_for_free_mem(active_limit_mb);
351
352 #if TARGET_OS_OSX
353 /*
354 * vm_map_fork() is always allowed on desktop.
355 */
356 expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED;
357 #endif
358
359 /*
360 * Prepare the arguments needed to spawn the child process.
361 */
362 memset(memlimit_str, 0, sizeof(memlimit_str));
363 (void)sprintf(memlimit_str, "%d", active_limit_mb);
364
365 ret = _NSGetExecutablePath(testpath, &testpath_size);
366 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "_NSGetExecutablePath(%s, ...)", testpath);
367
368 /*
369 * We put the child process in FOREGROUND to try and keep jetsam's hands off it.
370 */
371 child_pid = spawn_child_process(testpath, memlimit_str, flags,
372 JETSAM_PRIORITY_FOREGROUND, active_limit_mb, inactive_limit_mb);
373
374 expected_pidwatch_val |= (uint64_t)child_pid;
375
376 /*
377 * We only reach here if parent successfully spawned child process.
378 */
379 T_LOG(" spawned child_pid[%d] with memlimit %s (%d)MB\n",
380 child_pid, memlimit_str, active_limit_mb);
381
382 /*
383 * Set the kernel's pidwatch to look for the child.
384 */
385 (void)set_memorystatus_vm_map_fork_pidwatch((pid_t)0);
386 (void)set_memorystatus_vm_map_fork_pidwatch(child_pid);
387
388 /*
389 * Let the child run and wait for it to finish.
390 */
391 test_child_process(child_pid, &child_status, &ru);
392 T_LOG("Child exited with max_rss of %ld", ru.ru_maxrss);
393
394 /*
395 * Retrieve the kernel's pidwatch value. This should now indicate
396 * if the corpse was allowed or not.
397 */
398 kernel_pidwatch_val = get_memorystatus_vm_map_fork_pidwatch();
399 (void)set_memorystatus_vm_map_fork_pidwatch((pid_t)0);
400
401 /*
402 * If the child died abnormally, the test is invalid.
403 */
404 if (!WIFEXITED(child_status)) {
405 if (WIFSIGNALED(child_status)) {
406 /* jetsam kills a process with SIGKILL */
407 if (WTERMSIG(child_status) == SIGKILL) {
408 T_LOG("Child appears to have been a jetsam victim");
409 }
410 T_SKIP("Child terminated by signal %d test result invalid", WTERMSIG(child_status));
411 }
412 T_SKIP("child did not exit normally (status=%d) test result invalid", child_status);
413 }
414
415 /*
416 * We don't expect the child to exit for any other reason than success
417 */
418 exit_val = (enum child_exits)WEXITSTATUS(child_status);
419 T_QUIET; T_ASSERT_EQ(exit_val, NORMAL_EXIT, "child exit due to: %s",
420 (0 < exit_val && exit_val < NUM_CHILD_EXIT) ? child_exit_why[exit_val] : "unknown");
421
422 /*
423 * If the kernel aborted generating a corpse for other reasons, the test is invalid.
424 */
425 if (kernel_pidwatch_val == -1ull) {
426 T_SKIP("corpse generation was aborted by kernel");
427 }
428
429 /*
430 * We should always have made it through the vm_map_fork() checks in the kernel for this test.
431 */
432 T_QUIET; T_ASSERT_NE_ULLONG(kernel_pidwatch_val, (uint64_t)child_pid, "child didn't trigger corpse generation");
433
434 T_EXPECT_EQ(kernel_pidwatch_val, expected_pidwatch_val, "kernel value 0x%llx - expected 0x%llx",
435 kernel_pidwatch_val, expected_pidwatch_val);
436 }
437
438 /*
439 * The order of these 2 test functions is important. They will be executed by the test framwork in order.
440 *
441 * We test "not allowed first", then "allowed". If it were the other way around, the corpse from the "allowed"
442 * test would likely cause memory pressure and jetsam would likely kill the "not allowed" test.
443 */
444 T_DECL(memorystatus_vm_map_fork_test_not_allowed, "test that corpse generation was not allowed", T_META_ASROOT(true))
445 {
446 memorystatus_vm_map_fork_parent(TEST_NOT_ALLOWED);
447 }
448
449 T_DECL(memorystatus_vm_map_fork_test_allowed, "test corpse generation allowed", T_META_ASROOT(true))
450 {
451 memorystatus_vm_map_fork_parent(TEST_ALLOWED);
452 }