git.saurik.com Git - apple/xnu.git/blame_incremental - tests/memorystatus_vm_map

... / ...

Commit	Line	Data
	1	#include <stdio.h>
	2	#include <unistd.h>
	3	#include <stdlib.h>
	4	#include <errno.h>
	5	#include <string.h>
	6	#include <assert.h>
	7	#include <signal.h>
	8	#include <spawn.h>
	9	#include <spawn_private.h>
	10	#include <stdint.h>
	11	#include <sys/sysctl.h>
	12	#include <sys/spawn_internal.h>
	13	#include <sys/kern_memorystatus.h>
	14	#include <mach-o/dyld.h>
	15
	16	#include <darwintest.h>
	17	#include <darwintest_utils.h>
	18
	19	#include "test_utils.h"
	20
	21	T_GLOBAL_META(
	22	T_META_NAMESPACE("xnu.vm"),
	23	T_META_CHECK_LEAKS(false)
	24	);
	25
	26	extern char **environ;
	27
	28	/*
	29	* This test file contains two sub-tests which attempt to verify
	30	* the allowing or not allowing of a corpse for crashreporter when
	31	* a task exceeds its memory allocation limit. vm_map_fork() is the
	32	* kernel routine used to generate a corpse task.
	33	*
	34	* A corpse is allowed to be taken if a task's memory resource limit that
	35	* is exceeded is less than 1/4 of the system wide task limit.
	36	* If the amount exceeds 1/4 the sytem wide limit, then the corpse is disallowed.
	37	*
	38	* If the device under test is already under pressure, the test
	39	* could fail due to jetsam cutting in and killing the parent, child or
	40	* other necessary testing processes.
	41	*/
	42
	43	/* Test variants */
	44	#define TEST_ALLOWED 0x1
	45	#define TEST_NOT_ALLOWED 0x2
	46
	47	/*
	48	* Values which the kernel OR's into the PID when a corpse
	49	* is either allowed or disallowed for the
	50	* kern.memorystatus_vm_map_fork_pidwatch sysctl.
	51	*/
	52	#define MEMORYSTATUS_VM_MAP_FORK_ALLOWED 0x100000000ul
	53	#define MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED 0x200000000ul
	54
	55	/*
	56	* The memory allocation happens in a child process, this
	57	* is stuff to deal with creating and managing the child.
	58	* The child will only execute the T_HELPER_DECL.
	59	*/
	60	static char testpath[PATH_MAX];
	61	static uint32_t testpath_size = sizeof(testpath);
	62	#define LIMIT_DELTA_MB 5 /* an arbitrary limit delta */
	63	#define MEGABYTE (1024 * 1024)
	64
	65	/*
	66	* The child process communicates back to parent via an exit() code.
	67	*/
	68	enum child_exits {
	69	NORMAL_EXIT = 0,
	70	NO_MEMSIZE_ARG,
	71	INVALID_MEMSIZE,
	72	MALLOC_FAILED,
	73	NUM_CHILD_EXIT
	74	};
	75	static char *child_exit_why[] = {
	76	"normal exit",
	77	"no memsize argument to child",
	78	"invalid memsize argument to child",
	79	"malloc() failed",
	80	};
	81
	82	/*
	83	* Set/Get the sysctl used to determine if corpse collection occurs.
	84	* This is done by the kernel checking for a specific PID.
	85	*/
	86	static void
	87	set_memorystatus_vm_map_fork_pidwatch(pid_t pid)
	88	{
	89	uint64_t new_value = (uint64_t)pid;
	90	size_t new_len = sizeof(new_value);
	91	int err;
	92
	93	err = sysctlbyname("kern.memorystatus_vm_map_fork_pidwatch", NULL, NULL, &new_value, new_len);
	94	T_QUIET;
	95	T_ASSERT_POSIX_SUCCESS(err, "set sysctlbyname(kern.memorystatus_vm_map_fork_pidwatch...) failed");
	96	return;
	97	}
	98
	99	static uint64_t
	100	get_memorystatus_vm_map_fork_pidwatch()
	101	{
	102	uint64_t value = 0;
	103	size_t val_len = sizeof(value);
	104	int err;
	105
	106	err = sysctlbyname("kern.memorystatus_vm_map_fork_pidwatch", &value, &val_len, NULL, 0);
	107	T_QUIET;
	108	T_ASSERT_POSIX_SUCCESS(err, "get sysctlbyname(kern.memorystatus_vm_map_fork_pidwatch...) failed");
	109
	110	return value;
	111	}
	112
	113	/*
	114	* We want to avoid jetsam giving us bad results, if possible. So check if there's
	115	* enough memory for the test to run, waiting briefly for some to free up.
	116	*/
	117	static void
	118	wait_for_free_mem(int need_mb)
	119	{
	120	int64_t memsize;
	121	int memorystatus_level;
	122	size_t size;
	123	int64_t avail;
	124	int err;
	125	int try;
	126
	127	/*
	128	* get amount of memory in the machine
	129	*/
	130	size = sizeof(memsize);
	131	err = sysctlbyname("hw.memsize", &memsize, &size, NULL, 0);
	132	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "sysctlbyname(hw.memsize...) failed");
	133
	134	/*
	135	* Use a loop to briefly sleep and recheck if short on memory.
	136	*/
	137	try = 1;
	138	for (;;) {
	139	/*
	140	* memorystatus_level is a percentage of memory available. For example 20 means 1/5 of memory.
	141	* It currently doesn't exist on macOS but neither does jetsam, so pass the test there.
	142	*/
	143	size = sizeof(memorystatus_level);
	144	if (sysctlbyname("kern.memorystatus_level", &memorystatus_level, &size, NULL, 0) != 0) {
	145	return;
	146	}
	147	T_QUIET; T_ASSERT_LE(memorystatus_level, 100, "memorystatus_level too high");
	148	T_QUIET; T_ASSERT_GT(memorystatus_level, 0, "memorystatus_level negative");
	149
	150	/*
	151	* jetsam kicks in at memory status level of 15%, so subtract that much out of what's available.
	152	*/
	153	avail = MAX(0, (memsize * (memorystatus_level - 15)) / 100);
	154
	155	/*
	156	* We're good to go if there's more than enough available.
	157	*/
	158	if ((int64_t)need_mb * MEGABYTE < avail) {
	159	return;
	160	}
	161
	162	/*
	163	* issue a message to log and sleep briefly to see if we can get more memory
	164	*/
	165	if (try-- == 0) {
	166	break;
	167	}
	168	T_LOG("Need %d MB, only %d MB available. sleeping 5 seconds for more to free. memorystatus_level %d",
	169	need_mb, (int)(avail / MEGABYTE), memorystatus_level);
	170	sleep(5);
	171	}
	172	T_SKIP("Needed %d MB, but only %d MB available. Skipping test to avoid jetsam issues.",
	173	need_mb, (int)(avail / MEGABYTE));
	174	}
	175
	176
	177	/*
	178	* The main test calls this to spawn child process which will run and
	179	* exceed some memory limit. The child is initially suspended so that
	180	* we can do the sysctl calls before it runs.
	181	* Since this is a libdarwintest, the "-n" names the T_HELPER_DECL() that
	182	* we want to run. The arguments specific to the test follow a "--".
	183	*/
	184	static pid_t
	185	spawn_child_process(
	186	char * const executable,
	187	char * const memlimit,
	188	short flags,
	189	int priority,
	190	int active_limit_mb,
	191	int inactive_limit_mb)
	192	{
	193	posix_spawnattr_t spawn_attrs;
	194	int err;
	195	pid_t child_pid;
	196	char * const argv_child[] = { executable, "-n", "child_process", "--", memlimit, NULL };
	197
	198	err = posix_spawnattr_init(&spawn_attrs);
	199	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_init() failed");
	200
	201	err = posix_spawnattr_setflags(&spawn_attrs, POSIX_SPAWN_START_SUSPENDED);
	202	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_setflags() failed");
	203
	204	err = posix_spawnattr_setjetsam_ext(&spawn_attrs, flags, priority, active_limit_mb, inactive_limit_mb);
	205	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawnattr_setjetsam_ext() failed");
	206
	207	err = posix_spawn(&child_pid, executable, NULL, &spawn_attrs, argv_child, environ);
	208	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " posix_spawn() failed");
	209
	210	return child_pid;
	211	}
	212
	213
	214	/*
	215	* The parent calls this to continue the suspended child, then wait for its result.
	216	* We collect its resource usage to vefiry the expected amount allocated.
	217	*/
	218	static void
	219	test_child_process(pid_t child_pid, int status, struct rusage ru)
	220	{
	221	int err = 0;
	222	pid_t got_pid;
	223
	224	T_LOG(" continuing child[%d]\n", child_pid);
	225
	226	err = kill(child_pid, SIGCONT);
	227	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, " kill(%d, SIGCONT) failed", child_pid);
	228
	229	T_LOG(" waiting for child[%d] to exit", child_pid);
	230
	231	got_pid = wait4(child_pid, status, 0, ru);
	232	T_QUIET; T_ASSERT_EQ(child_pid, got_pid, " wait4(%d, ...) returned %d", child_pid, got_pid);
	233	}
	234
	235	/*
	236	* The child process executes this code. The easiest way, with given darwintest infrastructure,
	237	* it has to return information is via exit status.
	238	*/
	239	T_HELPER_DECL(child_process, "child allocates memory to failure")
	240	{
	241	#define BYTESPERALLOC MEGABYTE
	242	#define BYTESINEXCESS (2 * MEGABYTE) /* 2 MB - arbitrary */
	243	char *limit;
	244	long limit_mb = 0;
	245	long max_bytes_to_munch, bytes_remaining, bytes_this_munch;
	246	void *mem = NULL;
	247
	248	/*
	249	* This helper is run in a child process. The helper sees one argument
	250	* as a string which is the amount of memory in megabytes to allocate.
	251	*/
	252	if (argc != 1) {
	253	exit(NO_MEMSIZE_ARG);
	254	}
	255
	256	limit = argv[0];
	257	errno = 0;
	258	limit_mb = strtol(limit, NULL, 10);
	259	if (errno != 0 \|\| limit_mb <= 0) {
	260	exit(INVALID_MEMSIZE);
	261	}
	262
	263	/* Compute in excess of assigned limit */
	264	max_bytes_to_munch = limit_mb * MEGABYTE;
	265	max_bytes_to_munch += BYTESINEXCESS;
	266
	267	for (bytes_remaining = max_bytes_to_munch; bytes_remaining > 0; bytes_remaining -= bytes_this_munch) {
	268	bytes_this_munch = MIN(bytes_remaining, BYTESPERALLOC);
	269
	270	mem = malloc((size_t)bytes_this_munch);
	271	if (mem == NULL) {
	272	exit(MALLOC_FAILED);
	273	}
	274	arc4random_buf(mem, (size_t)bytes_this_munch);
	275	}
	276
	277	/* We chewed up all the memory we were asked to. */
	278	exit(NORMAL_EXIT);
	279	}
	280
	281
	282	/*
	283	* Actual test body.
	284	*/
	285	static void
	286	memorystatus_vm_map_fork_parent(int test_variant)
	287	{
	288	int max_task_pmem = 0; /* MB */
	289	size_t size = 0;
	290	int active_limit_mb = 0;
	291	int inactive_limit_mb = 0;
	292	short flags = 0;
	293	char memlimit_str[16];
	294	pid_t child_pid;
	295	int child_status;
	296	uint64_t kernel_pidwatch_val;
	297	uint64_t expected_pidwatch_val;
	298	int ret;
	299	struct rusage ru;
	300	enum child_exits exit_val;
	301
	302	/*
	303	* The code to set/get the pidwatch sysctl is only in
	304	* development kernels. Skip the test if not on one.
	305	*/
	306	if (!is_development_kernel()) {
	307	T_SKIP("Can't test on release kernel");
	308	}
	309
	310	/*
	311	* Determine a memory limit based on system having one or not.
	312	*/
	313	size = sizeof(max_task_pmem);
	314	(void)sysctlbyname("kern.max_task_pmem", &max_task_pmem, &size, NULL, 0);
	315	if (max_task_pmem <= 0) {
	316	max_task_pmem = 0;
	317	}
	318
	319	if (test_variant == TEST_ALLOWED) {
	320	/*
	321	* Tell the child to allocate less than 1/4 the system wide limit.
	322	*/
	323	if (max_task_pmem / 4 - LIMIT_DELTA_MB <= 0) {
	324	active_limit_mb = LIMIT_DELTA_MB;
	325	} else {
	326	active_limit_mb = max_task_pmem / 4 - LIMIT_DELTA_MB;
	327	}
	328	expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED;
	329	} else { /* TEST_NOT_ALLOWED */
	330	/*
	331	* Tell the child to allocate more than 1/4 the system wide limit.
	332	*/
	333	active_limit_mb = (max_task_pmem / 4) + LIMIT_DELTA_MB;
	334	if (max_task_pmem == 0) {
	335	expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED;
	336	} else {
	337	expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED;
	338	}
	339	}
	340	inactive_limit_mb = active_limit_mb;
	341	T_LOG("using limit of %d Meg", active_limit_mb);
	342
	343	/*
	344	* When run as part of a larger suite, a previous test
	345	* may have left the system temporarily with too little
	346	* memory to run this test. We try to detect if there is
	347	* enough free memory to proceed, waiting a little bit
	348	* for memory to free up.
	349	*/
	350	wait_for_free_mem(active_limit_mb);
	351
	352	#if TARGET_OS_OSX
	353	/*
	354	* vm_map_fork() is always allowed on desktop.
	355	*/
	356	expected_pidwatch_val = MEMORYSTATUS_VM_MAP_FORK_ALLOWED;
	357	#endif
	358
	359	/*
	360	* Prepare the arguments needed to spawn the child process.
	361	*/
	362	memset(memlimit_str, 0, sizeof(memlimit_str));
	363	(void)sprintf(memlimit_str, "%d", active_limit_mb);
	364
	365	ret = _NSGetExecutablePath(testpath, &testpath_size);
	366	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "_NSGetExecutablePath(%s, ...)", testpath);
	367
	368	/*
	369	* We put the child process in FOREGROUND to try and keep jetsam's hands off it.
	370	*/
	371	child_pid = spawn_child_process(testpath, memlimit_str, flags,
	372	JETSAM_PRIORITY_FOREGROUND, active_limit_mb, inactive_limit_mb);
	373
	374	expected_pidwatch_val \|= (uint64_t)child_pid;
	375
	376	/*
	377	* We only reach here if parent successfully spawned child process.
	378	*/
	379	T_LOG(" spawned child_pid[%d] with memlimit %s (%d)MB\n",
	380	child_pid, memlimit_str, active_limit_mb);
	381
	382	/*
	383	* Set the kernel's pidwatch to look for the child.
	384	*/
	385	(void)set_memorystatus_vm_map_fork_pidwatch((pid_t)0);
	386	(void)set_memorystatus_vm_map_fork_pidwatch(child_pid);
	387
	388	/*
	389	* Let the child run and wait for it to finish.
	390	*/
	391	test_child_process(child_pid, &child_status, &ru);
	392	T_LOG("Child exited with max_rss of %ld", ru.ru_maxrss);
	393
	394	/*
	395	* Retrieve the kernel's pidwatch value. This should now indicate
	396	* if the corpse was allowed or not.
	397	*/
	398	kernel_pidwatch_val = get_memorystatus_vm_map_fork_pidwatch();
	399	(void)set_memorystatus_vm_map_fork_pidwatch((pid_t)0);
	400
	401	/*
	402	* If the child died abnormally, the test is invalid.
	403	*/
	404	if (!WIFEXITED(child_status)) {
	405	if (WIFSIGNALED(child_status)) {
	406	/* jetsam kills a process with SIGKILL */
	407	if (WTERMSIG(child_status) == SIGKILL) {
	408	T_LOG("Child appears to have been a jetsam victim");
	409	}
	410	T_SKIP("Child terminated by signal %d test result invalid", WTERMSIG(child_status));
	411	}
	412	T_SKIP("child did not exit normally (status=%d) test result invalid", child_status);
	413	}
	414
	415	/*
	416	* We don't expect the child to exit for any other reason than success
	417	*/
	418	exit_val = (enum child_exits)WEXITSTATUS(child_status);
	419	T_QUIET; T_ASSERT_EQ(exit_val, NORMAL_EXIT, "child exit due to: %s",
	420	(0 < exit_val && exit_val < NUM_CHILD_EXIT) ? child_exit_why[exit_val] : "unknown");
	421
	422	/*
	423	* If the kernel aborted generating a corpse for other reasons, the test is invalid.
	424	*/
	425	if (kernel_pidwatch_val == -1ull) {
	426	T_SKIP("corpse generation was aborted by kernel");
	427	}
	428
	429	/*
	430	* We should always have made it through the vm_map_fork() checks in the kernel for this test.
	431	*/
	432	T_QUIET; T_ASSERT_NE_ULLONG(kernel_pidwatch_val, (uint64_t)child_pid, "child didn't trigger corpse generation");
	433
	434	T_EXPECT_EQ(kernel_pidwatch_val, expected_pidwatch_val, "kernel value 0x%llx - expected 0x%llx",
	435	kernel_pidwatch_val, expected_pidwatch_val);
	436	}
	437
	438	/*
	439	* The order of these 2 test functions is important. They will be executed by the test framwork in order.
	440	*
	441	* We test "not allowed first", then "allowed". If it were the other way around, the corpse from the "allowed"
	442	* test would likely cause memory pressure and jetsam would likely kill the "not allowed" test.
	443	*/
	444	T_DECL(memorystatus_vm_map_fork_test_not_allowed, "test that corpse generation was not allowed", T_META_ASROOT(true))
	445	{
	446	memorystatus_vm_map_fork_parent(TEST_NOT_ALLOWED);
	447	}
	448
	449	T_DECL(memorystatus_vm_map_fork_test_allowed, "test corpse generation allowed", T_META_ASROOT(true))
	450	{
	451	memorystatus_vm_map_fork_parent(TEST_ALLOWED);
	452	}