]> git.saurik.com Git - apple/xnu.git/blob - tools/tests/darwintests/memorystatus_zone_test.c
f652725bb30a9574773c659d56eb19342a22387f
[apple/xnu.git] / tools / tests / darwintests / memorystatus_zone_test.c
1 #include <stdio.h>
2 #include <mach/mach_vm.h>
3 #include <mach/mach_port.h>
4 #include <mach/mach_host.h>
5 #include <mach/mach_error.h>
6 #include <mach-o/dyld.h>
7 #include <sys/sysctl.h>
8 #include <sys/kdebug.h>
9 #include <sys/mman.h>
10 #include <sys/kern_memorystatus.h>
11 #include <ktrace/session.h>
12 #include <dispatch/private.h>
13
14 #ifdef T_NAMESPACE
15 #undef T_NAMESPACE
16 #endif
17 #include <darwintest.h>
18 #include <darwintest_utils.h>
19
20 T_GLOBAL_META(
21 T_META_NAMESPACE("xnu.vm"),
22 T_META_CHECK_LEAKS(false)
23 );
24
25 #define TIMEOUT_SECS 1500
26
27 #if TARGET_OS_EMBEDDED
28 #define ALLOCATION_SIZE_VM_REGION (16*1024) /* 16 KB */
29 #define ALLOCATION_SIZE_VM_OBJECT ALLOCATION_SIZE_VM_REGION
30 #else
31 #define ALLOCATION_SIZE_VM_REGION (1024*1024*100) /* 100 MB */
32 #define ALLOCATION_SIZE_VM_OBJECT (16*1024) /* 16 KB */
33 #endif
34 #define MAX_CHILD_PROCS 100
35
36 #define ZONEMAP_JETSAM_LIMIT_SYSCTL "kern.zone_map_jetsam_limit=60"
37
38 #define VME_ZONE_TEST_OPT "allocate_vm_regions"
39 #define VM_OBJECTS_ZONE_TEST_OPT "allocate_vm_objects"
40 #define GENERIC_ZONE_TEST_OPT "allocate_from_generic_zone"
41
42 #define VME_ZONE "VM map entries"
43 #define VMOBJECTS_ZONE "vm objects"
44 #define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
45
46 #define VM_TAG1 100
47 #define VM_TAG2 101
48
49 enum {
50 VME_ZONE_TEST = 0,
51 VM_OBJECTS_ZONE_TEST,
52 GENERIC_ZONE_TEST,
53 };
54
55 typedef struct test_config_struct {
56 int test_index;
57 int num_zones;
58 const char *helper_func;
59 mach_zone_name_array_t zone_names;
60 } test_config_struct;
61
62 static test_config_struct current_test;
63 static int num_children = 0;
64 static bool test_ending = false;
65 static bool within_dispatch_signal_handler = false;
66 static bool within_dispatch_timer_handler = false;
67 static dispatch_source_t ds_signal = NULL;
68 static dispatch_source_t ds_timer = NULL;
69 static ktrace_session_t session = NULL;
70
71 static mach_zone_info_array_t zone_info_array = NULL;
72 static mach_zone_name_t largest_zone_name;
73 static mach_zone_info_t largest_zone_info;
74
75 static char testpath[PATH_MAX];
76 static pid_t child_pids[MAX_CHILD_PROCS];
77 static pthread_mutex_t test_ending_mtx;
78
79 static void allocate_vm_regions(void);
80 static void allocate_vm_objects(void);
81 static void allocate_from_generic_zone(void);
82 static void cleanup_and_end_test(void);
83 static void setup_ktrace_session(void);
84 static void spawn_child_process(void);
85 static void run_test(void);
86 static bool verify_generic_jetsam_criteria(void);
87 static bool vme_zone_compares_to_vm_objects(void);
88 static void print_zone_map_size(void);
89 static void query_zone_info(void);
90 static void print_zone_info(mach_zone_name_t *zn, mach_zone_info_t *zi);
91
92 extern void mach_zone_force_gc(host_t host);
93 extern kern_return_t mach_zone_info_for_largest_zone(
94 host_priv_t host,
95 mach_zone_name_t *name,
96 mach_zone_info_t *info
97 );
98
99 static void allocate_vm_regions(void)
100 {
101 uint64_t alloc_size = ALLOCATION_SIZE_VM_REGION, i = 0;
102
103 printf("[%d] Allocating VM regions, each of size %lld KB\n", getpid(), (alloc_size>>10));
104 for (i = 0; ; i++) {
105 mach_vm_address_t addr = (mach_vm_address_t)NULL;
106
107 /* Alternate VM tags between consecutive regions to prevent coalescing */
108 int flags = VM_MAKE_TAG((i % 2)? VM_TAG1: VM_TAG2) | VM_FLAGS_ANYWHERE;
109
110 if ((mach_vm_allocate(mach_task_self(), &addr, (mach_vm_size_t)alloc_size, flags)) != KERN_SUCCESS) {
111 break;
112 }
113 }
114 printf("[%d] Number of allocations: %lld\n", getpid(), i);
115
116 /* Signal to the parent that we're done allocating */
117 kill(getppid(), SIGUSR1);
118
119 while (1) {
120 pause();
121 }
122 }
123
124 static void allocate_vm_objects(void)
125 {
126 uint64_t alloc_size = ALLOCATION_SIZE_VM_OBJECT, i = 0;
127
128 printf("[%d] Allocating VM regions, each of size %lld KB, each backed by a VM object\n", getpid(), (alloc_size>>10));
129 for (i = 0; ; i++) {
130 mach_vm_address_t addr = (mach_vm_address_t)NULL;
131
132 /* Alternate VM tags between consecutive regions to prevent coalescing */
133 int flags = VM_MAKE_TAG((i % 2)? VM_TAG1: VM_TAG2) | VM_FLAGS_ANYWHERE;
134
135 if ((mach_vm_allocate(mach_task_self(), &addr, (mach_vm_size_t)alloc_size, flags)) != KERN_SUCCESS) {
136 break;
137 }
138 /* Touch the region so the VM object can actually be created */
139 *((int *)addr) = 0;
140 /* OK to free this page. Keeps us from holding a lot of dirty pages */
141 madvise((void *)addr, (size_t)alloc_size, MADV_FREE);
142 }
143 printf("[%d] Number of allocations: %lld\n", getpid(), i);
144
145 /* Signal to the parent that we're done allocating */
146 kill(getppid(), SIGUSR1);
147
148 while (1) {
149 pause();
150 }
151 }
152
153 static void allocate_from_generic_zone(void)
154 {
155 uint64_t i = 0;
156
157 printf("[%d] Allocating mach_ports\n", getpid());
158 for (i = 0; ; i++) {
159 mach_port_t port;
160
161 if ((mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port)) != KERN_SUCCESS) {
162 break;
163 }
164 }
165 printf("[%d] Number of allocations: %lld\n", getpid(), i);
166
167 /* Signal to the parent that we're done allocating */
168 kill(getppid(), SIGUSR1);
169
170 while (1) {
171 pause();
172 }
173 }
174
175 static void print_zone_info(mach_zone_name_t *zn, mach_zone_info_t *zi)
176 {
177 T_LOG("ZONE NAME: %-35sSIZE: %-25lluELEMENTS: %llu",
178 zn->mzn_name, zi->mzi_cur_size, zi->mzi_count);
179 }
180
181 static void query_zone_info(void)
182 {
183 int i;
184 kern_return_t kr;
185 static uint64_t num_calls = 0;
186
187 for (i = 0; i < current_test.num_zones; i++) {
188 kr = mach_zone_info_for_zone(mach_host_self(), current_test.zone_names[i], &(zone_info_array[i]));
189 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_zone_info_for_zone(%s) returned %d [%s]", current_test.zone_names[i].mzn_name, kr, mach_error_string(kr));
190 }
191 kr = mach_zone_info_for_largest_zone(mach_host_self(), &largest_zone_name, &largest_zone_info);
192 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_zone_info_for_largest_zone returned %d [%s]", kr, mach_error_string(kr));
193
194 num_calls++;
195 if (num_calls % 10 != 0) {
196 return;
197 }
198
199 /* Print out size and element count for zones relevant to the test */
200 for (i = 0; i < current_test.num_zones; i++) {
201 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
202 }
203 }
204
205 static bool vme_zone_compares_to_vm_objects(void)
206 {
207 int i;
208 uint64_t vm_object_element_count = 0, vm_map_entry_element_count = 0;
209
210 T_LOG("Comparing element counts of \"VM map entries\" and \"vm objects\" zones");
211 for (i = 0; i < current_test.num_zones; i++) {
212 if (!strcmp(current_test.zone_names[i].mzn_name, VME_ZONE)) {
213 vm_map_entry_element_count = zone_info_array[i].mzi_count;
214 } else if (!strcmp(current_test.zone_names[i].mzn_name, VMOBJECTS_ZONE)) {
215 vm_object_element_count = zone_info_array[i].mzi_count;
216 }
217 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
218 }
219
220 T_LOG("# VM map entries as percentage of # vm objects = %llu", (vm_map_entry_element_count * 100)/ vm_object_element_count);
221 if (vm_map_entry_element_count >= ((vm_object_element_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / 100)) {
222 T_LOG("Number of VM map entries is comparable to vm objects\n\n");
223 return true;
224 }
225 T_LOG("Number of VM map entries is NOT comparable to vm objects\n\n");
226 return false;
227 }
228
229 static bool verify_generic_jetsam_criteria(void)
230 {
231 T_LOG("Largest zone info");
232 print_zone_info(&largest_zone_name, &largest_zone_info);
233
234 /* If VM map entries is not the largest zone */
235 if (strcmp(largest_zone_name.mzn_name, VME_ZONE)) {
236 /* If vm objects is the largest zone and the VM map entries zone had comparable # of elements, return false */
237 if (!strcmp(largest_zone_name.mzn_name, VMOBJECTS_ZONE) && vme_zone_compares_to_vm_objects()) {
238 return false;
239 }
240 return true;
241 }
242 return false;
243 }
244
245 static void cleanup_and_end_test(void)
246 {
247 int i;
248
249 /*
250 * The atend handler executes on a different dispatch queue.
251 * We want to do the cleanup only once.
252 */
253 pthread_mutex_lock(&test_ending_mtx);
254 if (test_ending) {
255 pthread_mutex_unlock(&test_ending_mtx);
256 return;
257 }
258 test_ending = true;
259 pthread_mutex_unlock(&test_ending_mtx);
260
261 T_LOG("Number of processes spawned: %d", num_children);
262 T_LOG("Cleaning up...");
263
264 /* Disable the timer that queries and prints zone info periodically */
265 if (ds_timer != NULL && !within_dispatch_timer_handler) {
266 dispatch_source_cancel(ds_timer);
267 }
268
269 /* Disable signal handler that spawns child processes, only if we're not in the event handler's context */
270 if (ds_signal != NULL && !within_dispatch_signal_handler) {
271 dispatch_source_cancel_and_wait(ds_signal);
272 }
273
274 /* Kill all the child processes that were spawned */
275 for (i = 0; i < num_children; i++) {
276 kill(child_pids[i], SIGKILL);
277 }
278 for (i = 0; i < num_children; i++) {
279 int status = 0;
280 if (waitpid(child_pids[i], &status, 0) < 0) {
281 T_LOG("waitpid returned status %d", status);
282 }
283 }
284 sleep(1);
285
286 /* Force zone_gc before starting test for another zone or exiting */
287 mach_zone_force_gc(mach_host_self());
288
289 /* End ktrace session */
290 if (session != NULL) {
291 ktrace_end(session, 1);
292 }
293
294 for (i = 0; i < current_test.num_zones; i++) {
295 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
296 }
297 }
298
299 static void setup_ktrace_session(void)
300 {
301 int ret = 0;
302
303 T_LOG("Setting up ktrace session...");
304 session = ktrace_session_create();
305 T_QUIET; T_ASSERT_NOTNULL(session, "ktrace_session_create");
306
307 ktrace_set_interactive(session);
308
309 ktrace_set_completion_handler(session, ^{
310 ktrace_session_destroy(session);
311 T_END;
312 });
313
314 /* Listen for memorystatus_do_kill trace events */
315 ret = ktrace_events_single(session, (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END, ^(ktrace_event_t event) {
316 int i;
317 bool received_jetsam_event = false;
318
319 /* We don't care about jetsams for any other reason except zone-map-exhaustion */
320 if (event->arg2 == kMemorystatusKilledZoneMapExhaustion) {
321 cleanup_and_end_test();
322 T_LOG("[memorystatus_do_kill] jetsam reason: zone-map-exhaustion, pid: %lu\n\n", event->arg1);
323 if (current_test.test_index == VME_ZONE_TEST || current_test.test_index == VM_OBJECTS_ZONE_TEST) {
324 /*
325 * For the VM map entries zone we try to kill the leaking process.
326 * Verify that we jetsammed one of the processes we spawned.
327 *
328 * For the vm objects zone we pick the leaking process via the VM map entries
329 * zone, if the number of vm objects and VM map entries are comparable.
330 * The test simulates this scenario, we should see a targeted jetsam for the
331 * vm objects zone too.
332 */
333 for (i = 0; i < num_children; i++) {
334 if (child_pids[i] == (pid_t)event->arg1) {
335 received_jetsam_event = true;
336 break;
337 }
338 }
339 /*
340 * If we didn't see a targeted jetsam, verify that the largest zone actually
341 * fulfilled the criteria for generic jetsams.
342 */
343 if (!received_jetsam_event && verify_generic_jetsam_criteria()) {
344 received_jetsam_event = true;
345 }
346 } else {
347 received_jetsam_event = true;
348 }
349
350 T_ASSERT_TRUE(received_jetsam_event, "Received zone-map-exhaustion jetsam event as expected");
351 }
352 });
353 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "ktrace_events_single");
354
355 ret = ktrace_start(session, dispatch_get_main_queue());
356 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "ktrace_start");
357 }
358
359 static void print_zone_map_size(void)
360 {
361 int ret;
362 uint64_t zstats[2];
363 size_t zstats_size = sizeof(zstats);
364
365 ret = sysctlbyname("kern.zone_map_size_and_capacity", &zstats, &zstats_size, NULL, 0);
366 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_size_and_capacity failed");
367
368 T_LOG("Zone map capacity: %-30lldZone map size: %lld [%lld%% full]", zstats[1], zstats[0], (zstats[0] * 100)/zstats[1]);
369 }
370
371 static void spawn_child_process(void)
372 {
373 pid_t pid = -1;
374 char helper_func[50];
375 char *launch_tool_args[4];
376
377 T_QUIET; T_ASSERT_LT(num_children, MAX_CHILD_PROCS, "Spawned %d children. Timing out...", MAX_CHILD_PROCS);
378
379 strlcpy(helper_func, current_test.helper_func, sizeof(helper_func));
380 launch_tool_args[0] = testpath;
381 launch_tool_args[1] = "-n";
382 launch_tool_args[2] = helper_func;
383 launch_tool_args[3] = NULL;
384
385 /* Spawn the child process */
386 int rc = dt_launch_tool(&pid, launch_tool_args, false, NULL, NULL);
387 if (rc != 0) {
388 T_LOG("dt_launch tool returned %d with error code %d", rc, errno);
389 }
390 T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "dt_launch_tool");
391
392 child_pids[num_children++] = pid;
393 }
394
395 static void run_test(void)
396 {
397 uint64_t mem;
398 uint32_t testpath_buf_size, pages;
399 int ret, dev, pgsz;
400 size_t sysctl_size;
401
402 T_ATEND(cleanup_and_end_test);
403 T_SETUPBEGIN;
404
405 dev = 0;
406 sysctl_size = sizeof(dev);
407 ret = sysctlbyname("kern.development", &dev, &sysctl_size, NULL, 0);
408 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.development failed");
409 if (dev == 0) {
410 T_SKIP("Skipping test on release kernel");
411 }
412
413 testpath_buf_size = sizeof(testpath);
414 ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
415 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
416 T_LOG("Executable path: %s", testpath);
417
418 sysctl_size = sizeof(mem);
419 ret = sysctlbyname("hw.memsize", &mem, &sysctl_size, NULL, 0);
420 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl hw.memsize failed");
421 T_LOG("hw.memsize: %llu", mem);
422
423 sysctl_size = sizeof(pgsz);
424 ret = sysctlbyname("vm.pagesize", &pgsz, &sysctl_size, NULL, 0);
425 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pagesize failed");
426 T_LOG("vm.pagesize: %d", pgsz);
427
428 sysctl_size = sizeof(pages);
429 ret = sysctlbyname("vm.pages", &pages, &sysctl_size, NULL, 0);
430 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pages failed");
431 T_LOG("vm.pages: %d", pages);
432
433 zone_info_array = (mach_zone_info_array_t) calloc((unsigned long)current_test.num_zones, sizeof *zone_info_array);
434
435 print_zone_map_size();
436
437 /*
438 * If the timeout specified by T_META_TIMEOUT is hit, the atend handler does not get called.
439 * So we're queueing a dispatch block to fire after TIMEOUT_SECS seconds, so we can exit cleanly.
440 */
441 dispatch_after(dispatch_time(DISPATCH_TIME_NOW, TIMEOUT_SECS * NSEC_PER_SEC), dispatch_get_main_queue(), ^{
442 T_ASSERT_FAIL("Timed out after %d seconds", TIMEOUT_SECS);
443 });
444
445 /*
446 * Create a dispatch source for the signal SIGUSR1. When a child is done allocating zone memory, it
447 * sends SIGUSR1 to the parent. Only then does the parent spawn another child. This prevents us from
448 * spawning many children at once and creating a lot of memory pressure.
449 */
450 signal(SIGUSR1, SIG_IGN);
451 ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
452 T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create: signal");
453
454 dispatch_source_set_event_handler(ds_signal, ^{
455 within_dispatch_signal_handler = true;
456 print_zone_map_size();
457
458 /* Wait a few seconds before spawning another child. Keeps us from allocating too aggressively */
459 sleep(5);
460 spawn_child_process();
461 within_dispatch_signal_handler = false;
462 });
463 dispatch_activate(ds_signal);
464
465 /* Timer to query jetsam-relevant zone info every second. Print it every 10 seconds. */
466 ds_timer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_queue_create("timer_queue", NULL));
467 T_QUIET; T_ASSERT_NOTNULL(ds_timer, "dispatch_source_create: timer");
468 dispatch_source_set_timer(ds_timer, dispatch_time(DISPATCH_TIME_NOW, NSEC_PER_SEC), NSEC_PER_SEC, 0);
469
470 dispatch_source_set_event_handler(ds_timer, ^{
471 within_dispatch_timer_handler = true;
472 query_zone_info();
473 within_dispatch_timer_handler = false;
474 });
475 dispatch_activate(ds_timer);
476
477 /* Set up a ktrace session to listen for jetsam events */
478 setup_ktrace_session();
479
480 T_SETUPEND;
481
482 /* Spawn the first child process */
483 T_LOG("Spawning child processes to allocate zone memory...\n\n");
484 spawn_child_process();
485
486 dispatch_main();
487 }
488
489 static void move_to_idle_band(void)
490 {
491 memorystatus_priority_properties_t props;
492
493 /*
494 * We want to move the processes we spawn into the idle band, so that jetsam can target them first.
495 * This prevents other important BATS tasks from getting killed, specially in LTE where we have very few
496 * processes running.
497 *
498 * This is only needed for tests which (are likely to) lead us down the generic jetsam path.
499 */
500 props.priority = JETSAM_PRIORITY_IDLE;
501 props.user_data = 0;
502
503 if (memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, getpid(), 0, &props, sizeof(props))) {
504 printf("memorystatus call to change jetsam priority failed\n");
505 exit(-1);
506 }
507 }
508
509 T_HELPER_DECL(allocate_vm_regions, "allocates VM regions")
510 {
511 allocate_vm_regions();
512 }
513
514 T_HELPER_DECL(allocate_vm_objects, "allocates VM objects and VM regions")
515 {
516 move_to_idle_band();
517 allocate_vm_objects();
518 }
519
520 T_HELPER_DECL(allocate_from_generic_zone, "allocates from a generic zone")
521 {
522 move_to_idle_band();
523 allocate_from_generic_zone();
524 }
525
526 /*
527 * T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL) changes the zone_map_jetsam_limit to a
528 * lower value, so that the test can complete faster.
529 * The test allocates zone memory pretty aggressively which can cause the system to panic
530 * if the jetsam limit is quite high; a lower value keeps us from panicking.
531 */
532 T_DECL( memorystatus_vme_zone_test,
533 "allocates elements from the VM map entries zone, verifies zone-map-exhaustion jetsams",
534 T_META_ASROOT(true),
535 T_META_TIMEOUT(1800),
536 /* T_META_LTEPHASE(LTE_POSTINIT),
537 */
538 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL))
539 {
540 current_test = (test_config_struct) {
541 .test_index = VME_ZONE_TEST,
542 .helper_func = VME_ZONE_TEST_OPT,
543 .num_zones = 1,
544 .zone_names = (mach_zone_name_t []){
545 { .mzn_name = VME_ZONE }
546 }
547 };
548 run_test();
549 }
550
551 T_DECL( memorystatus_vm_objects_zone_test,
552 "allocates elements from the VM objects and the VM map entries zones, verifies zone-map-exhaustion jetsams",
553 T_META_ASROOT(true),
554 T_META_TIMEOUT(1800),
555 /* T_META_LTEPHASE(LTE_POSTINIT),
556 */
557 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL))
558 {
559 current_test = (test_config_struct) {
560 .test_index = VM_OBJECTS_ZONE_TEST,
561 .helper_func = VM_OBJECTS_ZONE_TEST_OPT,
562 .num_zones = 2,
563 .zone_names = (mach_zone_name_t []){
564 { .mzn_name = VME_ZONE },
565 { .mzn_name = VMOBJECTS_ZONE}
566 }
567 };
568 run_test();
569 }
570
571 T_DECL( memorystatus_generic_zone_test,
572 "allocates elements from a zone that doesn't have an optimized jetsam path, verifies zone-map-exhaustion jetsams",
573 T_META_ASROOT(true),
574 T_META_TIMEOUT(1800),
575 /* T_META_LTEPHASE(LTE_POSTINIT),
576 */
577 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL))
578 {
579 current_test = (test_config_struct) {
580 .test_index = GENERIC_ZONE_TEST,
581 .helper_func = GENERIC_ZONE_TEST_OPT,
582 .num_zones = 0,
583 .zone_names = NULL
584 };
585 run_test();
586 }