]>
Commit | Line | Data |
---|---|---|
1 | #include <stdio.h> | |
2 | #include <unistd.h> | |
3 | #include <stdlib.h> | |
4 | #include <errno.h> | |
5 | #include <string.h> | |
6 | #include <assert.h> | |
7 | #include <signal.h> | |
8 | #include <spawn.h> | |
9 | #include <spawn_private.h> | |
10 | #include <stdint.h> | |
11 | #include <sys/sysctl.h> | |
12 | #include <sys/spawn_internal.h> | |
13 | #include <sys/kern_memorystatus.h> | |
14 | #include <mach-o/dyld.h> | |
15 | ||
16 | #include <darwintest.h> | |
17 | #include <darwintest_utils.h> | |
18 | ||
19 | T_GLOBAL_META( | |
20 | T_META_NAMESPACE("xnu.vm"), | |
21 | T_META_CHECK_LEAKS(false) | |
22 | ); | |
23 | ||
24 | extern char **environ; | |
25 | ||
26 | /* | |
27 | * This test file contains two sub-tests which attempt to verify | |
28 | * the allowing or not allowing of a corpse for crashreporter when | |
29 | * a task exceeds its memory allocation limit. vm_map_fork() is the | |
30 | * kernel routine used to generate a corpse task. | |
31 | * | |
32 | * A corpse is allowed to be taken if a task's memory resource limit that | |
33 | * is exceeded is less than 1/4 of the system wide task limit. | |
34 | * If the amount exceeds 1/4 the sytem wide limit, then the corpse is disallowed. | |
35 | * | |
36 | * If the device under test is already under pressure, the test | |
37 | * could fail due to jetsam cutting in and killing the parent, child or | |
38 | * other necessary testing processes. | |
39 | */ | |
40 | ||
41 | /* Test variants */ | |
42 | #define TEST_ALLOWED 0x1 | |
43 | #define TEST_NOT_ALLOWED 0x2 | |
44 | ||
45 | /* | |
46 | * Values which the kernel OR's into the PID when a corpse | |
47 | * is either allowed or disallowed for the | |
48 | * kern.memorystatus_vm_map_fork_pidwatch sysctl. | |
49 | */ | |
50 | #define MEMORYSTATUS_VM_MAP_FORK_ALLOWED 0x100000000ul | |
51 | #define MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED 0x200000000ul | |
52 | ||
53 | /* | |
54 | * The memory allocation happens in a child process, this | |
55 | * is stuff to deal with creating and managing the child. | |
56 | * The child will only execute the T_HELPER_DECL. | |
57 | */ | |
58 | static char testpath[PATH_MAX]; | |
59 | static uint32_t testpath_size = sizeof(testpath); | |
60 | #define LIMIT_DELTA_MB 5 /* an arbitrary limit delta */ | |
61 | #define MEGABYTE (1024 * 1024) | |
62 | ||
63 | /* | |
64 | * The child process communicates back to parent via an exit() code. | |
65 | */ | |
66 | enum child_exits { | |
67 | NORMAL_EXIT = 0, | |
68 | NO_MEMSIZE_ARG, | |
69 | INVALID_MEMSIZE, | |
70 | MALLOC_FAILED, | |
71 | NUM_CHILD_EXIT | |
72 | }; | |
73 | static char *child_exit_why[] = { | |
74 | "normal exit", | |
75 | "no memsize argument to child", | |
76 | "invalid memsize argument to child", | |
77 | "malloc() failed", | |
78 | }; | |
79 | ||
80 | /* | |
81 | * Corpse collection only happens in development kernels. | |
82 | * So we need this to detect if the test is relevant. | |
83 | */ | |
84 | static boolean_t | |
85 | is_development_kernel(void) | |
86 | { | |
87 | int ret; | |
88 | int dev = 0; | |
89 | size_t dev_size = sizeof(dev); | |
90 | ||
91 | ret = sysctlbyname("kern.development", &dev, &dev_size, NULL, 0); | |
92 | if (ret != 0) { | |
93 | return FALSE; | |
94 | } | |
95 | ||
96 | return (dev != 0); | |
97 | } | |
98 | ||
99 | /* | |
100 | * Set/Get the sysctl used to determine if corpse collection occurs. | |
101 | * This is done by the kernel checking for a specific PID. | |
102 | */ | |
103 | static void | |
104 | set_memorystatus_vm_map_fork_pidwatch(pid_t pid) | |
105 | { | |
106 | uint64_t new_value = (uint64_t)pid; | |
107 | size_t new_len = sizeof(new_value); | |
108 | int err; | |
109 | ||
110 | err = sysctlbyname("kern.memorystatus_vm_map_fork_pidwatch", NULL, NULL, &new_value, new_len); | |
111 | T_QUIET; | |
112 | T_ASSERT_POSIX_SUCCESS(err, "set sysctlbyname(kern.memorystatus_vm_map_fork_pidwatch...) failed"); | |
113 | return; | |
114 | } | |
115 | ||
116 | static uint64_t | |
117 | get_memorystatus_vm_map_fork_pidwatch() | |
118 | { | |
119 | uint64_t value = 0; | |
120 | size_t val_len = sizeof(value); | |
121 | int err; | |
122 | ||
123 | err = sysctlbyname("kern.memorystatus_vm_map_fork_pidwatch", &value, &val_len, NULL, 0); | |
124 | T_QUIET; | |
125 | T_ASSERT_POSIX_SUCCESS(err, "get sysctlbyname(kern.memorystatus_vm_map_fork_pidwatch...) failed"); | |
126 | ||
127 | return value; | |
128 | } | |
129 | ||
130 | /* | |
131 | * We want to avoid jetsam giving us bad results, if possible. So check if there's | |
132 | * enough memory for the test to run, waiting briefly for some to free up. | |
133 | */ | |
134 | static void | |
135 | wait_for_free_mem(int need_mb) | |
136 | { | |
137 | int64_t memsize; | |
138 | int memorystatus_level; | |
139 | size_t size; | |
140 | int64_t avail; | |
141 | int err; | |
142 | int try; | |
143 | ||
144 | /* | |
145 | * get amount of memory in the machine | |
146 | */ | |
147 | size = sizeof(memsize); | |
148 | err = sysctlbyname("hw.memsize", &memsize, &size, NULL, 0); | |
149 | T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "sysctlbyname(hw.memsize...) failed"); | |
150 | ||
151 | /* | |
152 | * Use a loop to briefly sleep and recheck if short on memory. | |
153 | */ | |
154 | try = 1; | |
155 | for (;;) { | |
156 | ||
157 | /* | |
158 | * memorystatus_level is a percentage of memory available. For example 20 means 1/5 of memory. | |
159 | * It currently doesn't exist on macOS but neither does jetsam, so pass the test there. | |
160 | */ | |
161 | size = sizeof(memorystatus_level); | |
162 | if (sysctlbyname("kern.memorystatus_level", &memorystatus_level, &size, NULL, 0) != 0) | |
163 | return; | |
164 | T_QUIET; T_ASSERT_LE(memorystatus_level, 100, "memorystatus_level too high"); | |
165 | T_QUIET; T_ASSERT_GT(memorystatus_level, 0, "memorystatus_level negative"); | |
166 | ||
167 | /* | |
168 | * jetsam kicks in at memory status level of 15%, so subtract that much out of what's available. | |
169 | */ | |
170 | avail = MAX(0, (memsize * (memorystatus_level - 15)) / 100); | |
171 | ||
172 | /* | |
173 | * We're good to go if there's more than enough available. | |
174 | */ | |
175 | if ((int64_t)need_mb * MEGABYTE < avail) | |
176 | return; | |
177 | ||
178 | /* | |
179 | * issue a message to log and sleep briefly to see if we can get more memory | |
180 | */ | |
181 | if (try-- == 0) | |
182 | break; | |
183 | T_LOG("Need %d MB, only %d MB available. sleeping 5 seconds for more to free. memorystatus_level %d", | |
184 | need_mb, (int)(avail / MEGABYTE), memorystatus_level); | |
185 | sleep(5); | |
186 | } | |
187 | T_SKIP("Needed %d MB, but only %d MB available. Skipping test to avoid jetsam issues.", | |
188 | need_mb, (int)(avail / MEGABYTE)); | |
189 | } | |
190 | ||
191 | ||
192 | /* | |
193 | * The main test calls this to spawn child process which will run and | |
194 | * exceed some memory limit. The child is initially suspended so that | |
195 | * we can do the sysctl calls before it runs. | |
196 | * Since this is a libdarwintest, the "-n" names the T_HELPER_DECL() that | |
197 | * we want to run. The arguments specific to the test follow a "--". | |
198 | */ | |
199 | static pid_t | |
200 | spawn_child_process( | |
201 | char * const executable, | |
202 | char * const memlimit, | |
203 | short flags, | |
204 | int priority, | |
205 | int active_limit_mb, | |
206 | int inactive_limit_mb) | |
207 | { | |
208 | posix_spawnattr_t spawn_attrs; | |
209 | int err; | |
210 | pid_t child_pid; | |
211 | char * const argv_child[] = { executable, "-n", "child_process", "--", memlimit, NULL }; | |
212 | ||
213 | err = posix_spawnattr_init(&spawn_attrs); | |
214 | T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_init() failed"); | |
215 | ||
216 | err = posix_spawnattr_setflags(&spawn_attrs, POSIX_SPAWN_START_SUSPENDED); | |
217 | T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_setflags() failed"); | |
218 | ||
219 | err = posix_spawnattr_setjetsam_ext(&spawn_attrs, flags, priority, active_limit_mb, inactive_limit_mb); | |
220 | T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_setjetsam_ext() failed"); | |
221 | ||
222 | err = posix_spawn(&child_pid, executable, NULL, &spawn_attrs, argv_child, environ); | |
223 | T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawn() failed"); | |
224 | ||
225 | return child_pid; | |
226 | } | |
227 | ||
228 | ||
229 | /* | |
230 | * The parent calls this to continue the suspended child, then wait for its result. | |
231 | * We collect its resource usage to vefiry the expected amount allocated. | |
232 | */ | |
233 | static void | |
234 | test_child_process(pid_t child_pid, int *status, struct rusage *ru) | |
235 | { | |
236 | int err = 0; | |
237 | pid_t got_pid; | |
238 | ||
239 | T_LOG(" continuing child[%d]\n", child_pid); | |
240 | ||
241 | err = kill(child_pid, SIGCONT); | |
242 | T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " kill(%d, SIGCONT) failed", child_pid); | |
243 | ||
244 | T_LOG(" waiting for child[%d] to exit", child_pid); | |
245 | ||
246 | got_pid = wait4(child_pid, status, 0, ru); | |
247 | T_QUIET; T_ASSERT_EQ(child_pid, got_pid, " wait4(%d, ...) returned %d", child_pid, got_pid); | |
248 | } | |
249 | ||
250 | /* | |
251 | * The child process executes this code. The easiest way, with given darwintest infrastructure, | |
252 | * it has to return information is via exit status. | |
253 | */ | |
254 | T_HELPER_DECL(child_process, "child allocates memory to failure") | |
255 | { | |
256 | #define BYTESPERALLOC MEGABYTE | |
257 | #define BYTESINEXCESS (2 * MEGABYTE) /* 2 MB - arbitrary */ | |
258 | char *limit; | |
259 | long limit_mb = 0; | |
260 | long max_bytes_to_munch, bytes_remaining, bytes_this_munch; | |
261 | void *mem = NULL; | |
262 | ||
263 | /* | |
264 | * This helper is run in a child process. The helper sees one argument | |
265 | * as a string which is the amount of memory in megabytes to allocate. | |
266 | */ | |
267 | if (argc != 1) | |
268 | exit(NO_MEMSIZE_ARG); | |
269 | ||
270 | limit = argv[0]; | |
271 | errno = 0; | |
272 | limit_mb = strtol(limit, NULL, 10); | |
273 | if (errno != 0 || limit_mb <= 0) | |
274 | exit(INVALID_MEMSIZE); | |
275 | ||
276 | /* Compute in excess of assigned limit */ | |
277 | max_bytes_to_munch = limit_mb * MEGABYTE; | |
278 | max_bytes_to_munch += BYTESINEXCESS; | |
279 | ||
280 | for (bytes_remaining = max_bytes_to_munch; bytes_remaining > 0; bytes_remaining -= bytes_this_munch) { | |
281 | bytes_this_munch = MIN(bytes_remaining, BYTESPERALLOC); | |
282 | ||
283 | mem = malloc((size_t)bytes_this_munch); | |
284 | if (mem == NULL) | |
285 | exit(MALLOC_FAILED); | |
286 | arc4random_buf(mem, (size_t)bytes_this_munch); | |
287 | } | |
288 | ||
289 | /* We chewed up all the memory we were asked to. */ | |
290 | exit(NORMAL_EXIT); | |
291 | } | |
292 | ||
293 | ||
294 | /* | |
295 | * Actual test body. | |
296 | */ | |
297 | static void | |
298 | memorystatus_vm_map_fork_parent(int test_variant) | |
299 | { | |
300 | int max_task_pmem = 0; /* MB */ | |
301 | size_t size = 0; | |
302 | int active_limit_mb = 0; | |
303 | int inactive_limit_mb = 0; | |
304 | short flags = 0; | |
305 | char memlimit_str[16]; | |
306 | pid_t child_pid; | |
307 | int child_status; | |
308 | uint64_t kernel_pidwatch_val; | |
309 | uint64_t expected_pidwatch_val; | |
310 | int ret; | |
311 | struct rusage ru; | |
312 | enum child_exits exit_val; | |
313 | ||
314 | /* | |
315 | * The code to set/get the pidwatch sysctl is only in | |
316 | * development kernels. Skip the test if not on one. | |
317 | */ | |
318 | if (!is_development_kernel()) { | |
319 | T_SKIP("Can't test on release kernel"); | |
320 | } | |
321 | ||
322 | /* | |
323 | * Determine a memory limit based on system having one or not. | |
324 | */ | |
325 | size = sizeof(max_task_pmem); | |
326 | (void)sysctlbyname("kern.max_task_pmem", &max_task_pmem, &size, NULL, 0); | |
327 | if (max_task_pmem <= 0) | |
328 | max_task_pmem = 0; | |
329 | ||
330 | if (test_variant == TEST_ALLOWED) { | |
331 | ||
332 | /* | |
333 | * Tell the child to allocate less than 1/4 the system wide limit. | |
334 | */ | |
335 | if (max_task_pmem / 4 - LIMIT_DELTA_MB <= 0) { | |
336 | active_limit_mb = LIMIT_DELTA_MB; | |
337 | } else { | |
338 | active_limit_mb = max_task_pmem / 4 - LIMIT_DELTA_MB; | |
339 | } | |
340 | expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED; | |
341 | ||
342 | } else { /* TEST_NOT_ALLOWED */ | |
343 | ||
344 | /* | |
345 | * Tell the child to allocate more than 1/4 the system wide limit. | |
346 | */ | |
347 | active_limit_mb = (max_task_pmem / 4) + LIMIT_DELTA_MB; | |
348 | if (max_task_pmem == 0) { | |
349 | expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED; | |
350 | } else { | |
351 | expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED; | |
352 | } | |
353 | ||
354 | } | |
355 | inactive_limit_mb = active_limit_mb; | |
356 | T_LOG("using limit of %d Meg", active_limit_mb); | |
357 | ||
358 | /* | |
359 | * When run as part of a larger suite, a previous test | |
360 | * may have left the system temporarily with too little | |
361 | * memory to run this test. We try to detect if there is | |
362 | * enough free memory to proceed, waiting a little bit | |
363 | * for memory to free up. | |
364 | */ | |
365 | wait_for_free_mem(active_limit_mb); | |
366 | ||
367 | #if defined(__x86_64__) | |
368 | /* | |
369 | * vm_map_fork() is always allowed on desktop. | |
370 | */ | |
371 | expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED; | |
372 | #endif | |
373 | ||
374 | /* | |
375 | * Prepare the arguments needed to spawn the child process. | |
376 | */ | |
377 | memset (memlimit_str, 0, sizeof(memlimit_str)); | |
378 | (void)sprintf(memlimit_str, "%d", active_limit_mb); | |
379 | ||
380 | ret = _NSGetExecutablePath(testpath, &testpath_size); | |
381 | T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "_NSGetExecutablePath(%s, ...)", testpath); | |
382 | ||
383 | /* | |
384 | * We put the child process in FOREGROUND to try and keep jetsam's hands off it. | |
385 | */ | |
386 | child_pid = spawn_child_process(testpath, memlimit_str, flags, | |
387 | JETSAM_PRIORITY_FOREGROUND, active_limit_mb, inactive_limit_mb); | |
388 | ||
389 | expected_pidwatch_val |= (uint64_t)child_pid; | |
390 | ||
391 | /* | |
392 | * We only reach here if parent successfully spawned child process. | |
393 | */ | |
394 | T_LOG(" spawned child_pid[%d] with memlimit %s (%d)MB\n", | |
395 | child_pid, memlimit_str, active_limit_mb); | |
396 | ||
397 | /* | |
398 | * Set the kernel's pidwatch to look for the child. | |
399 | */ | |
400 | (void)set_memorystatus_vm_map_fork_pidwatch((pid_t)0); | |
401 | (void)set_memorystatus_vm_map_fork_pidwatch(child_pid); | |
402 | ||
403 | /* | |
404 | * Let the child run and wait for it to finish. | |
405 | */ | |
406 | test_child_process(child_pid, &child_status, &ru); | |
407 | T_LOG("Child exited with max_rss of %ld", ru.ru_maxrss); | |
408 | ||
409 | /* | |
410 | * Retrieve the kernel's pidwatch value. This should now indicate | |
411 | * if the corpse was allowed or not. | |
412 | */ | |
413 | kernel_pidwatch_val = get_memorystatus_vm_map_fork_pidwatch(); | |
414 | (void)set_memorystatus_vm_map_fork_pidwatch((pid_t)0); | |
415 | ||
416 | /* | |
417 | * If the child died abnormally, the test is invalid. | |
418 | */ | |
419 | if (!WIFEXITED(child_status)) { | |
420 | if (WIFSIGNALED(child_status)) { | |
421 | /* jetsam kills a process with SIGKILL */ | |
422 | if (WTERMSIG(child_status) == SIGKILL) | |
423 | T_LOG("Child appears to have been a jetsam victim"); | |
424 | T_SKIP("Child terminated by signal %d test result invalid", WTERMSIG(child_status)); | |
425 | } | |
426 | T_SKIP("child did not exit normally (status=%d) test result invalid", child_status); | |
427 | } | |
428 | ||
429 | /* | |
430 | * We don't expect the child to exit for any other reason than success | |
431 | */ | |
432 | exit_val = (enum child_exits)WEXITSTATUS(child_status); | |
433 | T_QUIET; T_ASSERT_EQ(exit_val, NORMAL_EXIT, "child exit due to: %s", | |
434 | (0 < exit_val && exit_val < NUM_CHILD_EXIT) ? child_exit_why[exit_val] : "unknown"); | |
435 | ||
436 | /* | |
437 | * If the kernel aborted generating a corpse for other reasons, the test is invalid. | |
438 | */ | |
439 | if (kernel_pidwatch_val == -1ull) { | |
440 | T_SKIP("corpse generation was aborted by kernel"); | |
441 | } | |
442 | ||
443 | /* | |
444 | * We should always have made it through the vm_map_fork() checks in the kernel for this test. | |
445 | */ | |
446 | T_QUIET; T_ASSERT_NE_ULLONG(kernel_pidwatch_val, (uint64_t)child_pid, "child didn't trigger corpse generation"); | |
447 | ||
448 | T_EXPECT_EQ(kernel_pidwatch_val, expected_pidwatch_val, "kernel value 0x%llx - expected 0x%llx", | |
449 | kernel_pidwatch_val, expected_pidwatch_val); | |
450 | } | |
451 | ||
452 | /* | |
453 | * The order of these 2 test functions is important. They will be executed by the test framwork in order. | |
454 | * | |
455 | * We test "not allowed first", then "allowed". If it were the other way around, the corpse from the "allowed" | |
456 | * test would likely cause memory pressure and jetsam would likely kill the "not allowed" test. | |
457 | */ | |
458 | T_DECL(memorystatus_vm_map_fork_test_not_allowed, "test that corpse generation was not allowed") | |
459 | { | |
460 | memorystatus_vm_map_fork_parent(TEST_NOT_ALLOWED); | |
461 | } | |
462 | ||
463 | T_DECL(memorystatus_vm_map_fork_test_allowed, "test corpse generation allowed") | |
464 | { | |
465 | ||
466 | memorystatus_vm_map_fork_parent(TEST_ALLOWED); | |
467 | } |