2 #include <mach/mach_vm.h>
3 #include <mach/mach_port.h>
4 #include <mach/mach_host.h>
5 #include <mach/mach_error.h>
6 #include <mach-o/dyld.h>
7 #include <sys/sysctl.h>
8 #include <sys/kdebug.h>
10 #include <sys/kern_memorystatus.h>
11 #include <ktrace/session.h>
12 #include <dispatch/private.h>
17 #include <darwintest.h>
18 #include <darwintest_utils.h>
21 T_META_NAMESPACE("xnu.vm"),
22 T_META_CHECK_LEAKS(false)
25 #define TIMEOUT_SECS 1500
27 #if TARGET_OS_EMBEDDED
28 #define ALLOCATION_SIZE_VM_REGION (16*1024) /* 16 KB */
29 #define ALLOCATION_SIZE_VM_OBJECT ALLOCATION_SIZE_VM_REGION
31 #define ALLOCATION_SIZE_VM_REGION (1024*1024*100) /* 100 MB */
32 #define ALLOCATION_SIZE_VM_OBJECT (16*1024) /* 16 KB */
34 #define MAX_CHILD_PROCS 100
36 #define ZONEMAP_JETSAM_LIMIT_SYSCTL "kern.zone_map_jetsam_limit=60"
38 #define VME_ZONE_TEST_OPT "allocate_vm_regions"
39 #define VM_OBJECTS_ZONE_TEST_OPT "allocate_vm_objects"
40 #define GENERIC_ZONE_TEST_OPT "allocate_from_generic_zone"
42 #define VME_ZONE "VM map entries"
43 #define VMOBJECTS_ZONE "vm objects"
44 #define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
55 typedef struct test_config_struct
{
58 const char *helper_func
;
59 mach_zone_name_array_t zone_names
;
62 static test_config_struct current_test
;
63 static int num_children
= 0;
64 static bool test_ending
= false;
65 static dispatch_source_t ds_signal
= NULL
;
66 static dispatch_source_t ds_timer
= NULL
;
67 static dispatch_queue_t dq_spawn
= NULL
;
68 static ktrace_session_t session
= NULL
;
70 static mach_zone_info_array_t zone_info_array
= NULL
;
71 static mach_zone_name_t largest_zone_name
;
72 static mach_zone_info_t largest_zone_info
;
74 static char testpath
[PATH_MAX
];
75 static pid_t child_pids
[MAX_CHILD_PROCS
];
76 static pthread_mutex_t test_ending_mtx
;
78 static void allocate_vm_regions(void);
79 static void allocate_vm_objects(void);
80 static void allocate_from_generic_zone(void);
81 static void begin_test_teardown(void);
82 static void cleanup_and_end_test(void);
83 static void setup_ktrace_session(void);
84 static void spawn_child_process(void);
85 static void run_test(void);
86 static bool verify_generic_jetsam_criteria(void);
87 static bool vme_zone_compares_to_vm_objects(void);
88 static void print_zone_map_size(void);
89 static void query_zone_info(void);
90 static void print_zone_info(mach_zone_name_t
*zn
, mach_zone_info_t
*zi
);
92 extern void mach_zone_force_gc(host_t host
);
93 extern kern_return_t
mach_zone_info_for_largest_zone(
95 mach_zone_name_t
*name
,
96 mach_zone_info_t
*info
100 allocate_vm_regions(void)
102 uint64_t alloc_size
= ALLOCATION_SIZE_VM_REGION
, i
= 0;
104 printf("[%d] Allocating VM regions, each of size %lld KB\n", getpid(), (alloc_size
>> 10));
106 mach_vm_address_t addr
= (mach_vm_address_t
)NULL
;
108 /* Alternate VM tags between consecutive regions to prevent coalescing */
109 int flags
= VM_MAKE_TAG((i
% 2)? VM_TAG1
: VM_TAG2
) | VM_FLAGS_ANYWHERE
;
111 if ((mach_vm_allocate(mach_task_self(), &addr
, (mach_vm_size_t
)alloc_size
, flags
)) != KERN_SUCCESS
) {
115 printf("[%d] Number of allocations: %lld\n", getpid(), i
);
117 /* Signal to the parent that we're done allocating */
118 kill(getppid(), SIGUSR1
);
122 /* Exit if parent has exited. Ensures child processes don't linger around after the test exits */
123 if (getppid() == 1) {
130 allocate_vm_objects(void)
132 uint64_t alloc_size
= ALLOCATION_SIZE_VM_OBJECT
, i
= 0;
134 printf("[%d] Allocating VM regions, each of size %lld KB, each backed by a VM object\n", getpid(), (alloc_size
>> 10));
136 mach_vm_address_t addr
= (mach_vm_address_t
)NULL
;
138 /* Alternate VM tags between consecutive regions to prevent coalescing */
139 int flags
= VM_MAKE_TAG((i
% 2)? VM_TAG1
: VM_TAG2
) | VM_FLAGS_ANYWHERE
;
141 if ((mach_vm_allocate(mach_task_self(), &addr
, (mach_vm_size_t
)alloc_size
, flags
)) != KERN_SUCCESS
) {
144 /* Touch the region so the VM object can actually be created */
146 /* OK to free this page. Keeps us from holding a lot of dirty pages */
147 madvise((void *)addr
, (size_t)alloc_size
, MADV_FREE
);
149 printf("[%d] Number of allocations: %lld\n", getpid(), i
);
151 /* Signal to the parent that we're done allocating */
152 kill(getppid(), SIGUSR1
);
156 /* Exit if parent has exited. Ensures child processes don't linger around after the test exits */
157 if (getppid() == 1) {
164 allocate_from_generic_zone(void)
168 printf("[%d] Allocating mach_ports\n", getpid());
172 if ((mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE
, &port
)) != KERN_SUCCESS
) {
176 printf("[%d] Number of allocations: %lld\n", getpid(), i
);
178 /* Signal to the parent that we're done allocating */
179 kill(getppid(), SIGUSR1
);
183 /* Exit if parent has exited. Ensures child processes don't linger around after the test exits */
184 if (getppid() == 1) {
191 print_zone_info(mach_zone_name_t
*zn
, mach_zone_info_t
*zi
)
193 T_LOG("ZONE NAME: %-35sSIZE: %-25lluELEMENTS: %llu",
194 zn
->mzn_name
, zi
->mzi_cur_size
, zi
->mzi_count
);
198 query_zone_info(void)
202 static uint64_t num_calls
= 0;
204 for (i
= 0; i
< current_test
.num_zones
; i
++) {
205 kr
= mach_zone_info_for_zone(mach_host_self(), current_test
.zone_names
[i
], &(zone_info_array
[i
]));
206 T_QUIET
; T_ASSERT_MACH_SUCCESS(kr
, "mach_zone_info_for_zone(%s) returned %d [%s]", current_test
.zone_names
[i
].mzn_name
, kr
, mach_error_string(kr
));
208 kr
= mach_zone_info_for_largest_zone(mach_host_self(), &largest_zone_name
, &largest_zone_info
);
209 T_QUIET
; T_ASSERT_MACH_SUCCESS(kr
, "mach_zone_info_for_largest_zone returned %d [%s]", kr
, mach_error_string(kr
));
212 if (num_calls
% 10 != 0) {
216 /* Print out size and element count for zones relevant to the test */
217 for (i
= 0; i
< current_test
.num_zones
; i
++) {
218 print_zone_info(&(current_test
.zone_names
[i
]), &(zone_info_array
[i
]));
223 vme_zone_compares_to_vm_objects(void)
226 uint64_t vm_object_element_count
= 0, vm_map_entry_element_count
= 0;
228 T_LOG("Comparing element counts of \"VM map entries\" and \"vm objects\" zones");
229 for (i
= 0; i
< current_test
.num_zones
; i
++) {
230 if (!strcmp(current_test
.zone_names
[i
].mzn_name
, VME_ZONE
)) {
231 vm_map_entry_element_count
= zone_info_array
[i
].mzi_count
;
232 } else if (!strcmp(current_test
.zone_names
[i
].mzn_name
, VMOBJECTS_ZONE
)) {
233 vm_object_element_count
= zone_info_array
[i
].mzi_count
;
235 print_zone_info(&(current_test
.zone_names
[i
]), &(zone_info_array
[i
]));
238 T_LOG("# VM map entries as percentage of # vm objects = %llu", (vm_map_entry_element_count
* 100) / vm_object_element_count
);
239 if (vm_map_entry_element_count
>= ((vm_object_element_count
* VMENTRY_TO_VMOBJECT_COMPARISON_RATIO
) / 100)) {
240 T_LOG("Number of VM map entries is comparable to vm objects\n\n");
243 T_LOG("Number of VM map entries is NOT comparable to vm objects\n\n");
248 verify_generic_jetsam_criteria(void)
250 T_LOG("Largest zone info");
251 print_zone_info(&largest_zone_name
, &largest_zone_info
);
253 /* If VM map entries is not the largest zone */
254 if (strcmp(largest_zone_name
.mzn_name
, VME_ZONE
)) {
255 /* If vm objects is the largest zone and the VM map entries zone had comparable # of elements, return false */
256 if (!strcmp(largest_zone_name
.mzn_name
, VMOBJECTS_ZONE
) && vme_zone_compares_to_vm_objects()) {
265 begin_test_teardown(void)
267 /* End ktrace session */
268 if (session
!= NULL
) {
269 T_LOG("Ending ktrace session...");
270 ktrace_end(session
, 1);
273 dispatch_sync(dq_spawn
, ^{
274 T_LOG("Cancelling dispatch sources...");
276 /* Disable the timer that queries and prints zone info periodically */
277 if (ds_timer
!= NULL
) {
278 dispatch_source_cancel(ds_timer
);
281 /* Disable signal handler that spawns child processes */
282 if (ds_signal
!= NULL
) {
284 * No need for a dispatch_source_cancel_and_wait here.
285 * We're queueing this on the spawn queue, so no further
286 * processes will be spawned after the source is cancelled.
288 dispatch_source_cancel(ds_signal
);
294 cleanup_and_end_test(void)
299 * The atend handler executes on a different dispatch queue.
300 * We want to do the cleanup only once.
302 pthread_mutex_lock(&test_ending_mtx
);
304 pthread_mutex_unlock(&test_ending_mtx
);
308 pthread_mutex_unlock(&test_ending_mtx
);
310 dispatch_async(dq_spawn
, ^{
312 * If the test succeeds, we will call dispatch_source_cancel twice, which is fine since
313 * the operation is idempotent. Just make sure to not drop all references to the dispatch sources
314 * (in this case we're not, we have globals holding references to them), or we can end up with
315 * use-after-frees which would be a problem.
317 /* Disable the timer that queries and prints zone info periodically */
318 if (ds_timer
!= NULL
) {
319 dispatch_source_cancel(ds_timer
);
322 /* Disable signal handler that spawns child processes */
323 if (ds_signal
!= NULL
) {
324 dispatch_source_cancel(ds_signal
);
328 T_LOG("Number of processes spawned: %d", num_children
);
329 T_LOG("Killing child processes...");
331 /* Kill all the child processes that were spawned */
332 for (i
= 0; i
< num_children
; i
++) {
333 kill(child_pids
[i
], SIGKILL
);
335 * Sleep between kills to avoid hogging the VM map entries zone lock (on the task_terminate path).
336 * Without this we were seeing hw_lock_bit timeouts in BATS.
340 for (i
= 0; i
< num_children
; i
++) {
342 if (waitpid(child_pids
[i
], &status
, 0) < 0) {
343 T_LOG("waitpid returned status %d", status
);
348 /* Force zone_gc before starting test for another zone or exiting */
349 mach_zone_force_gc(mach_host_self());
351 /* End ktrace session */
352 if (session
!= NULL
) {
353 ktrace_end(session
, 1);
356 if (current_test
.num_zones
> 0) {
357 T_LOG("Relevant zone info at the end of the test:");
358 for (i
= 0; i
< current_test
.num_zones
; i
++) {
359 print_zone_info(&(current_test
.zone_names
[i
]), &(zone_info_array
[i
]));
365 setup_ktrace_session(void)
369 T_LOG("Setting up ktrace session...");
370 session
= ktrace_session_create();
371 T_QUIET
; T_ASSERT_NOTNULL(session
, "ktrace_session_create");
373 ktrace_set_interactive(session
);
375 ktrace_set_dropped_events_handler(session
, ^{
376 T_FAIL("Dropped ktrace events; might have missed an expected jetsam event. Terminating early.");
379 ktrace_set_completion_handler(session
, ^{
380 ktrace_session_destroy(session
);
384 /* Listen for memorystatus_do_kill trace events */
385 ret
= ktrace_events_single(session
, (BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_DO_KILL
)) | DBG_FUNC_END
, ^(ktrace_event_t event
) {
387 bool received_jetsam_event
= false;
389 /* We don't care about jetsams for any other reason except zone-map-exhaustion */
390 if (event
->arg2
== kMemorystatusKilledZoneMapExhaustion
) {
391 begin_test_teardown();
392 T_LOG("[memorystatus_do_kill] jetsam reason: zone-map-exhaustion, pid: %d\n\n", (int)event
->arg1
);
393 if (current_test
.test_index
== VME_ZONE_TEST
|| current_test
.test_index
== VM_OBJECTS_ZONE_TEST
) {
395 * For the VM map entries zone we try to kill the leaking process.
396 * Verify that we jetsammed one of the processes we spawned.
398 * For the vm objects zone we pick the leaking process via the VM map entries
399 * zone, if the number of vm objects and VM map entries are comparable.
400 * The test simulates this scenario, we should see a targeted jetsam for the
401 * vm objects zone too.
403 for (i
= 0; i
< num_children
; i
++) {
404 if (child_pids
[i
] == (pid_t
)event
->arg1
) {
405 received_jetsam_event
= true;
406 T_LOG("Received jetsam event for a child");
411 * If we didn't see a targeted jetsam, verify that the largest zone actually
412 * fulfilled the criteria for generic jetsams.
414 if (!received_jetsam_event
&& verify_generic_jetsam_criteria()) {
415 received_jetsam_event
= true;
416 T_LOG("Did not receive jetsam event for a child, but generic jetsam criteria holds");
419 received_jetsam_event
= true;
420 T_LOG("Received generic jetsam event");
423 T_QUIET
; T_ASSERT_TRUE(received_jetsam_event
, "Jetsam event not as expected");
426 T_QUIET
; T_ASSERT_POSIX_ZERO(ret
, "ktrace_events_single");
428 ret
= ktrace_start(session
, dispatch_get_main_queue());
429 T_QUIET
; T_ASSERT_POSIX_ZERO(ret
, "ktrace_start");
433 print_zone_map_size(void)
437 size_t zstats_size
= sizeof(zstats
);
439 ret
= sysctlbyname("kern.zone_map_size_and_capacity", &zstats
, &zstats_size
, NULL
, 0);
440 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl kern.zone_map_size_and_capacity failed");
442 T_LOG("Zone map capacity: %-30lldZone map size: %lld [%lld%% full]", zstats
[1], zstats
[0], (zstats
[0] * 100) / zstats
[1]);
446 spawn_child_process(void)
449 char helper_func
[50];
450 char *launch_tool_args
[4];
452 T_QUIET
; T_ASSERT_LT(num_children
, MAX_CHILD_PROCS
, "Spawned %d children. Timing out...", MAX_CHILD_PROCS
);
454 strlcpy(helper_func
, current_test
.helper_func
, sizeof(helper_func
));
455 launch_tool_args
[0] = testpath
;
456 launch_tool_args
[1] = "-n";
457 launch_tool_args
[2] = helper_func
;
458 launch_tool_args
[3] = NULL
;
460 /* Spawn the child process */
461 int rc
= dt_launch_tool(&pid
, launch_tool_args
, false, NULL
, NULL
);
463 T_LOG("dt_launch tool returned %d with error code %d", rc
, errno
);
465 T_QUIET
; T_ASSERT_POSIX_SUCCESS(pid
, "dt_launch_tool");
467 child_pids
[num_children
++] = pid
;
474 uint32_t testpath_buf_size
, pages
;
478 T_ATEND(cleanup_and_end_test
);
482 sysctl_size
= sizeof(dev
);
483 ret
= sysctlbyname("kern.development", &dev
, &sysctl_size
, NULL
, 0);
484 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl kern.development failed");
486 T_SKIP("Skipping test on release kernel");
489 testpath_buf_size
= sizeof(testpath
);
490 ret
= _NSGetExecutablePath(testpath
, &testpath_buf_size
);
491 T_QUIET
; T_ASSERT_POSIX_ZERO(ret
, "_NSGetExecutablePath");
492 T_LOG("Executable path: %s", testpath
);
494 sysctl_size
= sizeof(mem
);
495 ret
= sysctlbyname("hw.memsize", &mem
, &sysctl_size
, NULL
, 0);
496 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl hw.memsize failed");
497 T_LOG("hw.memsize: %llu", mem
);
499 sysctl_size
= sizeof(pgsz
);
500 ret
= sysctlbyname("vm.pagesize", &pgsz
, &sysctl_size
, NULL
, 0);
501 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl vm.pagesize failed");
502 T_LOG("vm.pagesize: %d", pgsz
);
504 sysctl_size
= sizeof(pages
);
505 ret
= sysctlbyname("vm.pages", &pages
, &sysctl_size
, NULL
, 0);
506 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl vm.pages failed");
507 T_LOG("vm.pages: %d", pages
);
509 zone_info_array
= (mach_zone_info_array_t
) calloc((unsigned long)current_test
.num_zones
, sizeof *zone_info_array
);
511 print_zone_map_size();
514 * If the timeout specified by T_META_TIMEOUT is hit, the atend handler does not get called.
515 * So we're queueing a dispatch block to fire after TIMEOUT_SECS seconds, so we can exit cleanly.
517 dispatch_after(dispatch_time(DISPATCH_TIME_NOW
, TIMEOUT_SECS
* NSEC_PER_SEC
), dispatch_get_main_queue(), ^{
518 T_ASSERT_FAIL("Timed out after %d seconds", TIMEOUT_SECS
);
522 * Create a dispatch source for the signal SIGUSR1. When a child is done allocating zone memory, it
523 * sends SIGUSR1 to the parent. Only then does the parent spawn another child. This prevents us from
524 * spawning many children at once and creating a lot of memory pressure.
526 signal(SIGUSR1
, SIG_IGN
);
527 dq_spawn
= dispatch_queue_create("spawn_queue", DISPATCH_QUEUE_SERIAL
);
528 ds_signal
= dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL
, SIGUSR1
, 0, dq_spawn
);
529 T_QUIET
; T_ASSERT_NOTNULL(ds_signal
, "dispatch_source_create: signal");
531 dispatch_source_set_event_handler(ds_signal
, ^{
532 print_zone_map_size();
534 /* Wait a few seconds before spawning another child. Keeps us from allocating too aggressively */
536 spawn_child_process();
538 dispatch_activate(ds_signal
);
540 /* Timer to query jetsam-relevant zone info every second. Print it every 10 seconds. */
541 ds_timer
= dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER
, 0, 0, dispatch_queue_create("timer_queue", NULL
));
542 T_QUIET
; T_ASSERT_NOTNULL(ds_timer
, "dispatch_source_create: timer");
543 dispatch_source_set_timer(ds_timer
, dispatch_time(DISPATCH_TIME_NOW
, NSEC_PER_SEC
), NSEC_PER_SEC
, 0);
545 dispatch_source_set_event_handler(ds_timer
, ^{
548 dispatch_activate(ds_timer
);
550 /* Set up a ktrace session to listen for jetsam events */
551 setup_ktrace_session();
555 /* Spawn the first child process */
556 T_LOG("Spawning child processes to allocate zone memory...\n\n");
557 spawn_child_process();
563 move_to_idle_band(void)
565 memorystatus_priority_properties_t props
;
568 * We want to move the processes we spawn into the idle band, so that jetsam can target them first.
569 * This prevents other important BATS tasks from getting killed, specially in LTE where we have very few
572 * This is only needed for tests which (are likely to) lead us down the generic jetsam path.
574 props
.priority
= JETSAM_PRIORITY_IDLE
;
577 if (memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES
, getpid(), 0, &props
, sizeof(props
))) {
578 printf("memorystatus call to change jetsam priority failed\n");
583 T_HELPER_DECL(allocate_vm_regions
, "allocates VM regions")
585 allocate_vm_regions();
588 T_HELPER_DECL(allocate_vm_objects
, "allocates VM objects and VM regions")
591 allocate_vm_objects();
594 T_HELPER_DECL(allocate_from_generic_zone
, "allocates from a generic zone")
597 allocate_from_generic_zone();
601 * T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL) changes the zone_map_jetsam_limit to a
602 * lower value, so that the test can complete faster.
603 * The test allocates zone memory pretty aggressively which can cause the system to panic
604 * if the jetsam limit is quite high; a lower value keeps us from panicking.
606 T_DECL( memorystatus_vme_zone_test
,
607 "allocates elements from the VM map entries zone, verifies zone-map-exhaustion jetsams",
609 T_META_TIMEOUT(1800),
610 /* T_META_LTEPHASE(LTE_POSTINIT),
612 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL
))
614 current_test
= (test_config_struct
) {
615 .test_index
= VME_ZONE_TEST
,
616 .helper_func
= VME_ZONE_TEST_OPT
,
618 .zone_names
= (mach_zone_name_t
[]){
619 { .mzn_name
= VME_ZONE
}
625 T_DECL( memorystatus_vm_objects_zone_test
,
626 "allocates elements from the VM objects and the VM map entries zones, verifies zone-map-exhaustion jetsams",
628 T_META_TIMEOUT(1800),
629 /* T_META_LTEPHASE(LTE_POSTINIT),
631 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL
))
633 current_test
= (test_config_struct
) {
634 .test_index
= VM_OBJECTS_ZONE_TEST
,
635 .helper_func
= VM_OBJECTS_ZONE_TEST_OPT
,
637 .zone_names
= (mach_zone_name_t
[]){
638 { .mzn_name
= VME_ZONE
},
639 { .mzn_name
= VMOBJECTS_ZONE
}
645 T_DECL( memorystatus_generic_zone_test
,
646 "allocates elements from a zone that doesn't have an optimized jetsam path, verifies zone-map-exhaustion jetsams",
648 T_META_TIMEOUT(1800),
649 /* T_META_LTEPHASE(LTE_POSTINIT),
651 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL
))
653 current_test
= (test_config_struct
) {
654 .test_index
= GENERIC_ZONE_TEST
,
655 .helper_func
= GENERIC_ZONE_TEST_OPT
,