2 #include <mach/mach_vm.h>
3 #include <mach/mach_port.h>
4 #include <mach/mach_host.h>
5 #include <mach/mach_error.h>
6 #include <mach-o/dyld.h>
7 #include <sys/sysctl.h>
8 #include <sys/kdebug.h>
10 #include <sys/kern_memorystatus.h>
11 #include <ktrace/session.h>
12 #include <dispatch/private.h>
17 #include <darwintest.h>
18 #include <darwintest_utils.h>
21 T_META_NAMESPACE("xnu.vm"),
22 T_META_CHECK_LEAKS(false)
25 #define TIMEOUT_SECS 1500
27 #if TARGET_OS_EMBEDDED
28 #define ALLOCATION_SIZE_VM_REGION (16*1024) /* 16 KB */
29 #define ALLOCATION_SIZE_VM_OBJECT ALLOCATION_SIZE_VM_REGION
31 #define ALLOCATION_SIZE_VM_REGION (1024*1024*100) /* 100 MB */
32 #define ALLOCATION_SIZE_VM_OBJECT (16*1024) /* 16 KB */
34 #define MAX_CHILD_PROCS 100
36 #define ZONEMAP_JETSAM_LIMIT_SYSCTL "kern.zone_map_jetsam_limit=60"
38 #define VME_ZONE_TEST_OPT "allocate_vm_regions"
39 #define VM_OBJECTS_ZONE_TEST_OPT "allocate_vm_objects"
40 #define GENERIC_ZONE_TEST_OPT "allocate_from_generic_zone"
42 #define VME_ZONE "VM map entries"
43 #define VMOBJECTS_ZONE "vm objects"
44 #define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
55 typedef struct test_config_struct
{
58 const char *helper_func
;
59 mach_zone_name_array_t zone_names
;
62 static test_config_struct current_test
;
63 static int num_children
= 0;
64 static bool test_ending
= false;
65 static bool within_dispatch_signal_handler
= false;
66 static bool within_dispatch_timer_handler
= false;
67 static dispatch_source_t ds_signal
= NULL
;
68 static dispatch_source_t ds_timer
= NULL
;
69 static ktrace_session_t session
= NULL
;
71 static mach_zone_info_array_t zone_info_array
= NULL
;
72 static mach_zone_name_t largest_zone_name
;
73 static mach_zone_info_t largest_zone_info
;
75 static char testpath
[PATH_MAX
];
76 static pid_t child_pids
[MAX_CHILD_PROCS
];
77 static pthread_mutex_t test_ending_mtx
;
79 static void allocate_vm_regions(void);
80 static void allocate_vm_objects(void);
81 static void allocate_from_generic_zone(void);
82 static void cleanup_and_end_test(void);
83 static void setup_ktrace_session(void);
84 static void spawn_child_process(void);
85 static void run_test(void);
86 static bool verify_generic_jetsam_criteria(void);
87 static bool vme_zone_compares_to_vm_objects(void);
88 static void print_zone_map_size(void);
89 static void query_zone_info(void);
90 static void print_zone_info(mach_zone_name_t
*zn
, mach_zone_info_t
*zi
);
92 extern void mach_zone_force_gc(host_t host
);
93 extern kern_return_t
mach_zone_info_for_largest_zone(
95 mach_zone_name_t
*name
,
96 mach_zone_info_t
*info
99 static void allocate_vm_regions(void)
101 uint64_t alloc_size
= ALLOCATION_SIZE_VM_REGION
, i
= 0;
103 printf("[%d] Allocating VM regions, each of size %lld KB\n", getpid(), (alloc_size
>>10));
105 mach_vm_address_t addr
= (mach_vm_address_t
)NULL
;
107 /* Alternate VM tags between consecutive regions to prevent coalescing */
108 int flags
= VM_MAKE_TAG((i
% 2)? VM_TAG1
: VM_TAG2
) | VM_FLAGS_ANYWHERE
;
110 if ((mach_vm_allocate(mach_task_self(), &addr
, (mach_vm_size_t
)alloc_size
, flags
)) != KERN_SUCCESS
) {
114 printf("[%d] Number of allocations: %lld\n", getpid(), i
);
116 /* Signal to the parent that we're done allocating */
117 kill(getppid(), SIGUSR1
);
124 static void allocate_vm_objects(void)
126 uint64_t alloc_size
= ALLOCATION_SIZE_VM_OBJECT
, i
= 0;
128 printf("[%d] Allocating VM regions, each of size %lld KB, each backed by a VM object\n", getpid(), (alloc_size
>>10));
130 mach_vm_address_t addr
= (mach_vm_address_t
)NULL
;
132 /* Alternate VM tags between consecutive regions to prevent coalescing */
133 int flags
= VM_MAKE_TAG((i
% 2)? VM_TAG1
: VM_TAG2
) | VM_FLAGS_ANYWHERE
;
135 if ((mach_vm_allocate(mach_task_self(), &addr
, (mach_vm_size_t
)alloc_size
, flags
)) != KERN_SUCCESS
) {
138 /* Touch the region so the VM object can actually be created */
140 /* OK to free this page. Keeps us from holding a lot of dirty pages */
141 madvise((void *)addr
, (size_t)alloc_size
, MADV_FREE
);
143 printf("[%d] Number of allocations: %lld\n", getpid(), i
);
145 /* Signal to the parent that we're done allocating */
146 kill(getppid(), SIGUSR1
);
153 static void allocate_from_generic_zone(void)
157 printf("[%d] Allocating mach_ports\n", getpid());
161 if ((mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE
, &port
)) != KERN_SUCCESS
) {
165 printf("[%d] Number of allocations: %lld\n", getpid(), i
);
167 /* Signal to the parent that we're done allocating */
168 kill(getppid(), SIGUSR1
);
175 static void print_zone_info(mach_zone_name_t
*zn
, mach_zone_info_t
*zi
)
177 T_LOG("ZONE NAME: %-35sSIZE: %-25lluELEMENTS: %llu",
178 zn
->mzn_name
, zi
->mzi_cur_size
, zi
->mzi_count
);
181 static void query_zone_info(void)
185 static uint64_t num_calls
= 0;
187 for (i
= 0; i
< current_test
.num_zones
; i
++) {
188 kr
= mach_zone_info_for_zone(mach_host_self(), current_test
.zone_names
[i
], &(zone_info_array
[i
]));
189 T_QUIET
; T_ASSERT_MACH_SUCCESS(kr
, "mach_zone_info_for_zone(%s) returned %d [%s]", current_test
.zone_names
[i
].mzn_name
, kr
, mach_error_string(kr
));
191 kr
= mach_zone_info_for_largest_zone(mach_host_self(), &largest_zone_name
, &largest_zone_info
);
192 T_QUIET
; T_ASSERT_MACH_SUCCESS(kr
, "mach_zone_info_for_largest_zone returned %d [%s]", kr
, mach_error_string(kr
));
195 if (num_calls
% 10 != 0) {
199 /* Print out size and element count for zones relevant to the test */
200 for (i
= 0; i
< current_test
.num_zones
; i
++) {
201 print_zone_info(&(current_test
.zone_names
[i
]), &(zone_info_array
[i
]));
205 static bool vme_zone_compares_to_vm_objects(void)
208 uint64_t vm_object_element_count
= 0, vm_map_entry_element_count
= 0;
210 T_LOG("Comparing element counts of \"VM map entries\" and \"vm objects\" zones");
211 for (i
= 0; i
< current_test
.num_zones
; i
++) {
212 if (!strcmp(current_test
.zone_names
[i
].mzn_name
, VME_ZONE
)) {
213 vm_map_entry_element_count
= zone_info_array
[i
].mzi_count
;
214 } else if (!strcmp(current_test
.zone_names
[i
].mzn_name
, VMOBJECTS_ZONE
)) {
215 vm_object_element_count
= zone_info_array
[i
].mzi_count
;
217 print_zone_info(&(current_test
.zone_names
[i
]), &(zone_info_array
[i
]));
220 T_LOG("# VM map entries as percentage of # vm objects = %llu", (vm_map_entry_element_count
* 100)/ vm_object_element_count
);
221 if (vm_map_entry_element_count
>= ((vm_object_element_count
* VMENTRY_TO_VMOBJECT_COMPARISON_RATIO
) / 100)) {
222 T_LOG("Number of VM map entries is comparable to vm objects\n\n");
225 T_LOG("Number of VM map entries is NOT comparable to vm objects\n\n");
229 static bool verify_generic_jetsam_criteria(void)
231 T_LOG("Largest zone info");
232 print_zone_info(&largest_zone_name
, &largest_zone_info
);
234 /* If VM map entries is not the largest zone */
235 if (strcmp(largest_zone_name
.mzn_name
, VME_ZONE
)) {
236 /* If vm objects is the largest zone and the VM map entries zone had comparable # of elements, return false */
237 if (!strcmp(largest_zone_name
.mzn_name
, VMOBJECTS_ZONE
) && vme_zone_compares_to_vm_objects()) {
245 static void cleanup_and_end_test(void)
250 * The atend handler executes on a different dispatch queue.
251 * We want to do the cleanup only once.
253 pthread_mutex_lock(&test_ending_mtx
);
255 pthread_mutex_unlock(&test_ending_mtx
);
259 pthread_mutex_unlock(&test_ending_mtx
);
261 T_LOG("Number of processes spawned: %d", num_children
);
262 T_LOG("Cleaning up...");
264 /* Disable the timer that queries and prints zone info periodically */
265 if (ds_timer
!= NULL
&& !within_dispatch_timer_handler
) {
266 dispatch_source_cancel(ds_timer
);
269 /* Disable signal handler that spawns child processes, only if we're not in the event handler's context */
270 if (ds_signal
!= NULL
&& !within_dispatch_signal_handler
) {
271 dispatch_source_cancel_and_wait(ds_signal
);
274 /* Kill all the child processes that were spawned */
275 for (i
= 0; i
< num_children
; i
++) {
276 kill(child_pids
[i
], SIGKILL
);
278 * Sleep between kills to avoid hogging the VM map entries zone lock (on the task_terminate path).
279 * Without this we were seeing hw_lock_bit timeouts in BATS.
283 for (i
= 0; i
< num_children
; i
++) {
285 if (waitpid(child_pids
[i
], &status
, 0) < 0) {
286 T_LOG("waitpid returned status %d", status
);
291 /* Force zone_gc before starting test for another zone or exiting */
292 mach_zone_force_gc(mach_host_self());
294 /* End ktrace session */
295 if (session
!= NULL
) {
296 ktrace_end(session
, 1);
299 for (i
= 0; i
< current_test
.num_zones
; i
++) {
300 print_zone_info(&(current_test
.zone_names
[i
]), &(zone_info_array
[i
]));
304 static void setup_ktrace_session(void)
308 T_LOG("Setting up ktrace session...");
309 session
= ktrace_session_create();
310 T_QUIET
; T_ASSERT_NOTNULL(session
, "ktrace_session_create");
312 ktrace_set_interactive(session
);
314 ktrace_set_completion_handler(session
, ^{
315 ktrace_session_destroy(session
);
319 /* Listen for memorystatus_do_kill trace events */
320 ret
= ktrace_events_single(session
, (BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_DO_KILL
)) | DBG_FUNC_END
, ^(ktrace_event_t event
) {
322 bool received_jetsam_event
= false;
324 /* We don't care about jetsams for any other reason except zone-map-exhaustion */
325 if (event
->arg2
== kMemorystatusKilledZoneMapExhaustion
) {
326 cleanup_and_end_test();
327 T_LOG("[memorystatus_do_kill] jetsam reason: zone-map-exhaustion, pid: %lu\n\n", event
->arg1
);
328 if (current_test
.test_index
== VME_ZONE_TEST
|| current_test
.test_index
== VM_OBJECTS_ZONE_TEST
) {
330 * For the VM map entries zone we try to kill the leaking process.
331 * Verify that we jetsammed one of the processes we spawned.
333 * For the vm objects zone we pick the leaking process via the VM map entries
334 * zone, if the number of vm objects and VM map entries are comparable.
335 * The test simulates this scenario, we should see a targeted jetsam for the
336 * vm objects zone too.
338 for (i
= 0; i
< num_children
; i
++) {
339 if (child_pids
[i
] == (pid_t
)event
->arg1
) {
340 received_jetsam_event
= true;
345 * If we didn't see a targeted jetsam, verify that the largest zone actually
346 * fulfilled the criteria for generic jetsams.
348 if (!received_jetsam_event
&& verify_generic_jetsam_criteria()) {
349 received_jetsam_event
= true;
352 received_jetsam_event
= true;
355 T_ASSERT_TRUE(received_jetsam_event
, "Received zone-map-exhaustion jetsam event as expected");
358 T_QUIET
; T_ASSERT_POSIX_ZERO(ret
, "ktrace_events_single");
360 ret
= ktrace_start(session
, dispatch_get_main_queue());
361 T_QUIET
; T_ASSERT_POSIX_ZERO(ret
, "ktrace_start");
364 static void print_zone_map_size(void)
368 size_t zstats_size
= sizeof(zstats
);
370 ret
= sysctlbyname("kern.zone_map_size_and_capacity", &zstats
, &zstats_size
, NULL
, 0);
371 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl kern.zone_map_size_and_capacity failed");
373 T_LOG("Zone map capacity: %-30lldZone map size: %lld [%lld%% full]", zstats
[1], zstats
[0], (zstats
[0] * 100)/zstats
[1]);
376 static void spawn_child_process(void)
379 char helper_func
[50];
380 char *launch_tool_args
[4];
382 T_QUIET
; T_ASSERT_LT(num_children
, MAX_CHILD_PROCS
, "Spawned %d children. Timing out...", MAX_CHILD_PROCS
);
384 strlcpy(helper_func
, current_test
.helper_func
, sizeof(helper_func
));
385 launch_tool_args
[0] = testpath
;
386 launch_tool_args
[1] = "-n";
387 launch_tool_args
[2] = helper_func
;
388 launch_tool_args
[3] = NULL
;
390 /* Spawn the child process */
391 int rc
= dt_launch_tool(&pid
, launch_tool_args
, false, NULL
, NULL
);
393 T_LOG("dt_launch tool returned %d with error code %d", rc
, errno
);
395 T_QUIET
; T_ASSERT_POSIX_SUCCESS(pid
, "dt_launch_tool");
397 child_pids
[num_children
++] = pid
;
400 static void run_test(void)
403 uint32_t testpath_buf_size
, pages
;
407 T_ATEND(cleanup_and_end_test
);
411 sysctl_size
= sizeof(dev
);
412 ret
= sysctlbyname("kern.development", &dev
, &sysctl_size
, NULL
, 0);
413 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl kern.development failed");
415 T_SKIP("Skipping test on release kernel");
418 testpath_buf_size
= sizeof(testpath
);
419 ret
= _NSGetExecutablePath(testpath
, &testpath_buf_size
);
420 T_QUIET
; T_ASSERT_POSIX_ZERO(ret
, "_NSGetExecutablePath");
421 T_LOG("Executable path: %s", testpath
);
423 sysctl_size
= sizeof(mem
);
424 ret
= sysctlbyname("hw.memsize", &mem
, &sysctl_size
, NULL
, 0);
425 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl hw.memsize failed");
426 T_LOG("hw.memsize: %llu", mem
);
428 sysctl_size
= sizeof(pgsz
);
429 ret
= sysctlbyname("vm.pagesize", &pgsz
, &sysctl_size
, NULL
, 0);
430 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl vm.pagesize failed");
431 T_LOG("vm.pagesize: %d", pgsz
);
433 sysctl_size
= sizeof(pages
);
434 ret
= sysctlbyname("vm.pages", &pages
, &sysctl_size
, NULL
, 0);
435 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sysctl vm.pages failed");
436 T_LOG("vm.pages: %d", pages
);
438 zone_info_array
= (mach_zone_info_array_t
) calloc((unsigned long)current_test
.num_zones
, sizeof *zone_info_array
);
440 print_zone_map_size();
443 * If the timeout specified by T_META_TIMEOUT is hit, the atend handler does not get called.
444 * So we're queueing a dispatch block to fire after TIMEOUT_SECS seconds, so we can exit cleanly.
446 dispatch_after(dispatch_time(DISPATCH_TIME_NOW
, TIMEOUT_SECS
* NSEC_PER_SEC
), dispatch_get_main_queue(), ^{
447 T_ASSERT_FAIL("Timed out after %d seconds", TIMEOUT_SECS
);
451 * Create a dispatch source for the signal SIGUSR1. When a child is done allocating zone memory, it
452 * sends SIGUSR1 to the parent. Only then does the parent spawn another child. This prevents us from
453 * spawning many children at once and creating a lot of memory pressure.
455 signal(SIGUSR1
, SIG_IGN
);
456 ds_signal
= dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL
, SIGUSR1
, 0, dispatch_get_main_queue());
457 T_QUIET
; T_ASSERT_NOTNULL(ds_signal
, "dispatch_source_create: signal");
459 dispatch_source_set_event_handler(ds_signal
, ^{
460 within_dispatch_signal_handler
= true;
461 print_zone_map_size();
463 /* Wait a few seconds before spawning another child. Keeps us from allocating too aggressively */
465 spawn_child_process();
466 within_dispatch_signal_handler
= false;
468 dispatch_activate(ds_signal
);
470 /* Timer to query jetsam-relevant zone info every second. Print it every 10 seconds. */
471 ds_timer
= dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER
, 0, 0, dispatch_queue_create("timer_queue", NULL
));
472 T_QUIET
; T_ASSERT_NOTNULL(ds_timer
, "dispatch_source_create: timer");
473 dispatch_source_set_timer(ds_timer
, dispatch_time(DISPATCH_TIME_NOW
, NSEC_PER_SEC
), NSEC_PER_SEC
, 0);
475 dispatch_source_set_event_handler(ds_timer
, ^{
476 within_dispatch_timer_handler
= true;
478 within_dispatch_timer_handler
= false;
480 dispatch_activate(ds_timer
);
482 /* Set up a ktrace session to listen for jetsam events */
483 setup_ktrace_session();
487 /* Spawn the first child process */
488 T_LOG("Spawning child processes to allocate zone memory...\n\n");
489 spawn_child_process();
494 static void move_to_idle_band(void)
496 memorystatus_priority_properties_t props
;
499 * We want to move the processes we spawn into the idle band, so that jetsam can target them first.
500 * This prevents other important BATS tasks from getting killed, specially in LTE where we have very few
503 * This is only needed for tests which (are likely to) lead us down the generic jetsam path.
505 props
.priority
= JETSAM_PRIORITY_IDLE
;
508 if (memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES
, getpid(), 0, &props
, sizeof(props
))) {
509 printf("memorystatus call to change jetsam priority failed\n");
514 T_HELPER_DECL(allocate_vm_regions
, "allocates VM regions")
516 allocate_vm_regions();
519 T_HELPER_DECL(allocate_vm_objects
, "allocates VM objects and VM regions")
522 allocate_vm_objects();
525 T_HELPER_DECL(allocate_from_generic_zone
, "allocates from a generic zone")
528 allocate_from_generic_zone();
532 * T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL) changes the zone_map_jetsam_limit to a
533 * lower value, so that the test can complete faster.
534 * The test allocates zone memory pretty aggressively which can cause the system to panic
535 * if the jetsam limit is quite high; a lower value keeps us from panicking.
537 T_DECL( memorystatus_vme_zone_test
,
538 "allocates elements from the VM map entries zone, verifies zone-map-exhaustion jetsams",
540 T_META_TIMEOUT(1800),
541 /* T_META_LTEPHASE(LTE_POSTINIT),
543 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL
))
545 current_test
= (test_config_struct
) {
546 .test_index
= VME_ZONE_TEST
,
547 .helper_func
= VME_ZONE_TEST_OPT
,
549 .zone_names
= (mach_zone_name_t
[]){
550 { .mzn_name
= VME_ZONE
}
556 T_DECL( memorystatus_vm_objects_zone_test
,
557 "allocates elements from the VM objects and the VM map entries zones, verifies zone-map-exhaustion jetsams",
559 T_META_TIMEOUT(1800),
560 /* T_META_LTEPHASE(LTE_POSTINIT),
562 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL
))
564 current_test
= (test_config_struct
) {
565 .test_index
= VM_OBJECTS_ZONE_TEST
,
566 .helper_func
= VM_OBJECTS_ZONE_TEST_OPT
,
568 .zone_names
= (mach_zone_name_t
[]){
569 { .mzn_name
= VME_ZONE
},
570 { .mzn_name
= VMOBJECTS_ZONE
}
576 T_DECL( memorystatus_generic_zone_test
,
577 "allocates elements from a zone that doesn't have an optimized jetsam path, verifies zone-map-exhaustion jetsams",
579 T_META_TIMEOUT(1800),
580 /* T_META_LTEPHASE(LTE_POSTINIT),
582 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL
))
584 current_test
= (test_config_struct
) {
585 .test_index
= GENERIC_ZONE_TEST
,
586 .helper_func
= GENERIC_ZONE_TEST_OPT
,