[apple/xnu.git] / tests / cpucount.c

/*
 * Test to validate that we can schedule threads on all hw.ncpus cores according to _os_cpu_number
 *
 * <rdar://problem/29545645>
 *
 *  xcrun -sdk macosx.internal clang -o cpucount cpucount.c -ldarwintest -g -Weverything
 *  xcrun -sdk iphoneos.internal clang -arch arm64 -o cpucount-ios cpucount.c -ldarwintest -g -Weverything
 */

#include <darwintest.h>

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdalign.h>
#include <unistd.h>
#include <assert.h>
#include <pthread.h>
#include <err.h>
#include <errno.h>
#include <sysexits.h>
#include <sys/sysctl.h>
#include <stdatomic.h>

#include <mach/mach.h>
#include <mach/mach_time.h>

#include <os/tsd.h> /* private header for _os_cpu_number */

T_GLOBAL_META(T_META_RUN_CONCURRENTLY(true));

/* const variables aren't constants, but enums are */
enum { max_threads = 40 };

#define CACHE_ALIGNED __attribute__((aligned(128)))

static _Atomic CACHE_ALIGNED uint64_t g_ready_threads = 0;

static _Atomic CACHE_ALIGNED bool g_cpu_seen[max_threads];

static _Atomic CACHE_ALIGNED bool g_bail = false;

static uint32_t g_threads; /* set by sysctl hw.ncpu */

static uint64_t g_spin_ms = 50; /* it takes ~50ms of spinning for CLPC to deign to give us all cores */

/*
 * sometimes pageout scan can eat all of CPU 0 long enough to fail the test,
 * so we run the test at RT priority
 */
static uint32_t g_thread_pri = 97;

/*
 * add in some extra low-pri threads to convince the amp scheduler to use E-cores consistently
 * works around <rdar://problem/29636191>
 */
static uint32_t g_spin_threads = 2;
static uint32_t g_spin_threads_pri = 20;

static semaphore_t g_readysem, g_go_sem;

static mach_timebase_info_data_t timebase_info;

static uint64_t
nanos_to_abs(uint64_t nanos)
{
	return nanos * timebase_info.denom / timebase_info.numer;
}

static void
set_realtime(pthread_t thread)
{
	kern_return_t kr;
	thread_time_constraint_policy_data_t pol;

	mach_port_t target_thread = pthread_mach_thread_np(thread);
	T_QUIET; T_ASSERT_NOTNULL(target_thread, "pthread_mach_thread_np");

	/* 1s 100ms 10ms */
	pol.period      = (uint32_t)nanos_to_abs(1000000000);
	pol.constraint  = (uint32_t)nanos_to_abs(100000000);
	pol.computation = (uint32_t)nanos_to_abs(10000000);

	pol.preemptible = 0; /* Ignored by OS */
	kr = thread_policy_set(target_thread, THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol,
	    THREAD_TIME_CONSTRAINT_POLICY_COUNT);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_policy_set(THREAD_TIME_CONSTRAINT_POLICY)");
}

static pthread_t
create_thread(void *(*start_routine)(void *), uint32_t priority)
{
	int rv;
	pthread_t new_thread;
	pthread_attr_t attr;

	struct sched_param param = { .sched_priority = (int)priority };

	rv = pthread_attr_init(&attr);
	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_init");

	rv = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setdetachstate");

	rv = pthread_attr_setschedparam(&attr, &param);
	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setschedparam");

	rv = pthread_create(&new_thread, &attr, start_routine, NULL);
	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_create");

	if (priority == 97) {
		set_realtime(new_thread);
	}

	rv = pthread_attr_destroy(&attr);
	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_destroy");

	return new_thread;
}

static void *
thread_fn(__unused void *arg)
{
	T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");

	kern_return_t kr;

	kr = semaphore_wait_signal(g_go_sem, g_readysem);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");

	/* atomic inc to say hello */
	g_ready_threads++;

	uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();

	/*
	 * spin to force the other threads to spread out across the cores
	 * may take some time if cores are masked and CLPC needs to warm up to unmask them
	 */
	while (g_ready_threads < g_threads && mach_absolute_time() < timeout) {
		;
	}

	T_QUIET; T_ASSERT_GE(timeout, mach_absolute_time(), "waiting for all threads took too long");

	timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();

	int iteration = 0;
	uint32_t cpunum = 0;

	/* search for new CPUs for the duration */
	while (mach_absolute_time() < timeout) {
		cpunum = _os_cpu_number();

		assert(cpunum < max_threads);

		g_cpu_seen[cpunum] = true;

		if (iteration++ % 10000) {
			uint32_t cpus_seen = 0;

			for (uint32_t i = 0; i < g_threads; i++) {
				if (g_cpu_seen[i]) {
					cpus_seen++;
				}
			}

			/* bail out early if we saw all CPUs */
			if (cpus_seen == g_threads) {
				break;
			}
		}
	}

	g_bail = true;

	printf("thread cpunum: %d\n", cpunum);

	kr = semaphore_wait_signal(g_go_sem, g_readysem);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");

	return NULL;
}

static void *
spin_fn(__unused void *arg)
{
	T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");

	kern_return_t kr;

	kr = semaphore_wait_signal(g_go_sem, g_readysem);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");

	uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC * 2) + mach_absolute_time();

	/*
	 * run and sleep a bit to force some scheduler churn to get all the cores active
	 * needed to work around bugs in the amp scheduler
	 */
	while (mach_absolute_time() < timeout && g_bail == false) {
		usleep(500);

		uint64_t inner_timeout = nanos_to_abs(1 * NSEC_PER_MSEC) + mach_absolute_time();

		while (mach_absolute_time() < inner_timeout && g_bail == false) {
			;
		}
	}

	kr = semaphore_wait_signal(g_go_sem, g_readysem);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");

	return NULL;
}


#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wgnu-flexible-array-initializer"
T_DECL(count_cpus, "Tests we can schedule threads on all hw.ncpus cores according to _os_cpu_number",
    T_META_CHECK_LEAKS(false), T_META_ENABLED(false))
#pragma clang diagnostic pop
{
	setvbuf(stdout, NULL, _IONBF, 0);
	setvbuf(stderr, NULL, _IONBF, 0);

	int rv;
	kern_return_t kr;
	kr = mach_timebase_info(&timebase_info);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");

	kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");

	kr = semaphore_create(mach_task_self(), &g_go_sem, SYNC_POLICY_FIFO, 0);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");

	size_t ncpu_size = sizeof(g_threads);
	rv = sysctlbyname("hw.ncpu", &g_threads, &ncpu_size, NULL, 0);
	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sysctlbyname(hw.ncpu)");

	printf("hw.ncpu: %2d\n", g_threads);

	assert(g_threads < max_threads);

	for (uint32_t i = 0; i < g_threads; i++) {
		create_thread(&thread_fn, g_thread_pri);
	}

	for (uint32_t i = 0; i < g_spin_threads; i++) {
		create_thread(&spin_fn, g_spin_threads_pri);
	}

	for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
		kr = semaphore_wait(g_readysem);
		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
	}

	uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();

	/* spin to warm up CLPC :) */
	while (mach_absolute_time() < timeout) {
		;
	}

	kr = semaphore_signal_all(g_go_sem);
	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");

	for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
		kr = semaphore_wait(g_readysem);
		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
	}

	uint32_t cpus_seen = 0;

	for (uint32_t i = 0; i < g_threads; i++) {
		if (g_cpu_seen[i]) {
			cpus_seen++;
		}

		printf("cpu %2d: %d\n", i, g_cpu_seen[i]);
	}

	T_ASSERT_EQ(cpus_seen, g_threads, "test should have run threads on all CPUS");
}
Commit	Line	Data
5ba3f43e A	1	/*
	2	* Test to validate that we can schedule threads on all hw.ncpus cores according to _os_cpu_number
	3	*
	4	* <rdar://problem/29545645>
	5	*
0a7de745 A	6	* xcrun -sdk macosx.internal clang -o cpucount cpucount.c -ldarwintest -g -Weverything
0a7de745 A	7	* xcrun -sdk iphoneos.internal clang -arch arm64 -o cpucount-ios cpucount.c -ldarwintest -g -Weverything
5ba3f43e A	8	*/
	9
	10	#include <darwintest.h>
	11
	12	#include <stdio.h>
	13	#include <stdlib.h>
	14	#include <stdbool.h>
	15	#include <stdalign.h>
	16	#include <unistd.h>
	17	#include <assert.h>
	18	#include <pthread.h>
	19	#include <err.h>
	20	#include <errno.h>
	21	#include <sysexits.h>
	22	#include <sys/sysctl.h>
	23	#include <stdatomic.h>
	24
	25	#include <mach/mach.h>
	26	#include <mach/mach_time.h>
	27
	28	#include <os/tsd.h> /* private header for _os_cpu_number */
	29
cb323159 A	30	T_GLOBAL_META(T_META_RUN_CONCURRENTLY(true));
cb323159 A	31
5ba3f43e A	32	/* const variables aren't constants, but enums are */
	33	enum { max_threads = 40 };
	34
	35	#define CACHE_ALIGNED __attribute__((aligned(128)))
	36
	37	static _Atomic CACHE_ALIGNED uint64_t g_ready_threads = 0;
	38
	39	static _Atomic CACHE_ALIGNED bool g_cpu_seen[max_threads];
	40
	41	static _Atomic CACHE_ALIGNED bool g_bail = false;
	42
	43	static uint32_t g_threads; /* set by sysctl hw.ncpu */
	44
	45	static uint64_t g_spin_ms = 50; /* it takes ~50ms of spinning for CLPC to deign to give us all cores */
	46
	47	/*
	48	* sometimes pageout scan can eat all of CPU 0 long enough to fail the test,
	49	* so we run the test at RT priority
	50	*/
	51	static uint32_t g_thread_pri = 97;
	52
	53	/*
	54	* add in some extra low-pri threads to convince the amp scheduler to use E-cores consistently
	55	* works around <rdar://problem/29636191>
	56	*/
	57	static uint32_t g_spin_threads = 2;
	58	static uint32_t g_spin_threads_pri = 20;
	59
	60	static semaphore_t g_readysem, g_go_sem;
	61
	62	static mach_timebase_info_data_t timebase_info;
	63
0a7de745 A	64	static uint64_t
	65	nanos_to_abs(uint64_t nanos)
	66	{
	67	return nanos * timebase_info.denom / timebase_info.numer;
	68	}
5ba3f43e	69
0a7de745 A	70	static void
	71	set_realtime(pthread_t thread)
	72	{
5ba3f43e A	73	kern_return_t kr;
	74	thread_time_constraint_policy_data_t pol;
	75
	76	mach_port_t target_thread = pthread_mach_thread_np(thread);
	77	T_QUIET; T_ASSERT_NOTNULL(target_thread, "pthread_mach_thread_np");
	78
	79	/* 1s 100ms 10ms */
	80	pol.period = (uint32_t)nanos_to_abs(1000000000);
	81	pol.constraint = (uint32_t)nanos_to_abs(100000000);
	82	pol.computation = (uint32_t)nanos_to_abs(10000000);
	83
	84	pol.preemptible = 0; /* Ignored by OS */
	85	kr = thread_policy_set(target_thread, THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol,
0a7de745	86	THREAD_TIME_CONSTRAINT_POLICY_COUNT);
5ba3f43e A	87	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_policy_set(THREAD_TIME_CONSTRAINT_POLICY)");
	88	}
	89
	90	static pthread_t
	91	create_thread(void (start_routine)(void *), uint32_t priority)
	92	{
	93	int rv;
	94	pthread_t new_thread;
	95	pthread_attr_t attr;
	96
	97	struct sched_param param = { .sched_priority = (int)priority };
	98
	99	rv = pthread_attr_init(&attr);
	100	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_init");
	101
	102	rv = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	103	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setdetachstate");
	104
	105	rv = pthread_attr_setschedparam(&attr, &param);
	106	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setschedparam");
	107
	108	rv = pthread_create(&new_thread, &attr, start_routine, NULL);
	109	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_create");
	110
0a7de745	111	if (priority == 97) {
5ba3f43e	112	set_realtime(new_thread);
0a7de745	113	}
5ba3f43e A	114
	115	rv = pthread_attr_destroy(&attr);
	116	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_destroy");
	117
	118	return new_thread;
	119	}
	120
	121	static void *
	122	thread_fn(__unused void *arg)
	123	{
	124	T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");
	125
	126	kern_return_t kr;
	127
	128	kr = semaphore_wait_signal(g_go_sem, g_readysem);
	129	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
	130
	131	/* atomic inc to say hello */
	132	g_ready_threads++;
	133
	134	uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
	135
	136	/*
	137	* spin to force the other threads to spread out across the cores
	138	* may take some time if cores are masked and CLPC needs to warm up to unmask them
	139	*/
0a7de745 A	140	while (g_ready_threads < g_threads && mach_absolute_time() < timeout) {
	141	;
	142	}
5ba3f43e A	143
	144	T_QUIET; T_ASSERT_GE(timeout, mach_absolute_time(), "waiting for all threads took too long");
	145
	146	timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
	147
	148	int iteration = 0;
	149	uint32_t cpunum = 0;
	150
	151	/* search for new CPUs for the duration */
	152	while (mach_absolute_time() < timeout) {
	153	cpunum = _os_cpu_number();
	154
	155	assert(cpunum < max_threads);
	156
	157	g_cpu_seen[cpunum] = true;
	158
	159	if (iteration++ % 10000) {
	160	uint32_t cpus_seen = 0;
	161
0a7de745 A	162	for (uint32_t i = 0; i < g_threads; i++) {
0a7de745 A	163	if (g_cpu_seen[i]) {
5ba3f43e	164	cpus_seen++;
0a7de745	165	}
5ba3f43e A	166	}
	167
	168	/* bail out early if we saw all CPUs */
0a7de745	169	if (cpus_seen == g_threads) {
5ba3f43e	170	break;
0a7de745	171	}
5ba3f43e A	172	}
	173	}
	174
	175	g_bail = true;
	176
	177	printf("thread cpunum: %d\n", cpunum);
	178
	179	kr = semaphore_wait_signal(g_go_sem, g_readysem);
	180	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
	181
	182	return NULL;
	183	}
	184
	185	static void *
	186	spin_fn(__unused void *arg)
	187	{
	188	T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");
	189
	190	kern_return_t kr;
	191
	192	kr = semaphore_wait_signal(g_go_sem, g_readysem);
	193	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
	194
	195	uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC * 2) + mach_absolute_time();
	196
	197	/*
	198	* run and sleep a bit to force some scheduler churn to get all the cores active
	199	* needed to work around bugs in the amp scheduler
	200	*/
	201	while (mach_absolute_time() < timeout && g_bail == false) {
	202	usleep(500);
	203
	204	uint64_t inner_timeout = nanos_to_abs(1 * NSEC_PER_MSEC) + mach_absolute_time();
	205
0a7de745 A	206	while (mach_absolute_time() < inner_timeout && g_bail == false) {
	207	;
	208	}
5ba3f43e A	209	}
	210
	211	kr = semaphore_wait_signal(g_go_sem, g_readysem);
	212	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
	213
	214	return NULL;
	215	}
	216
	217
	218	#pragma clang diagnostic push
	219	#pragma clang diagnostic ignored "-Wgnu-flexible-array-initializer"
	220	T_DECL(count_cpus, "Tests we can schedule threads on all hw.ncpus cores according to _os_cpu_number",
0a7de745	221	T_META_CHECK_LEAKS(false), T_META_ENABLED(false))
5ba3f43e A	222	#pragma clang diagnostic pop
	223	{
	224	setvbuf(stdout, NULL, _IONBF, 0);
	225	setvbuf(stderr, NULL, _IONBF, 0);
	226
	227	int rv;
	228	kern_return_t kr;
	229	kr = mach_timebase_info(&timebase_info);
	230	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
	231
	232	kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
	233	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
	234
	235	kr = semaphore_create(mach_task_self(), &g_go_sem, SYNC_POLICY_FIFO, 0);
	236	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
	237
	238	size_t ncpu_size = sizeof(g_threads);
	239	rv = sysctlbyname("hw.ncpu", &g_threads, &ncpu_size, NULL, 0);
	240	T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sysctlbyname(hw.ncpu)");
	241
	242	printf("hw.ncpu: %2d\n", g_threads);
	243
	244	assert(g_threads < max_threads);
	245
0a7de745	246	for (uint32_t i = 0; i < g_threads; i++) {
5ba3f43e	247	create_thread(&thread_fn, g_thread_pri);
0a7de745	248	}
5ba3f43e	249
0a7de745	250	for (uint32_t i = 0; i < g_spin_threads; i++) {
5ba3f43e	251	create_thread(&spin_fn, g_spin_threads_pri);
0a7de745	252	}
5ba3f43e	253
0a7de745	254	for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
5ba3f43e A	255	kr = semaphore_wait(g_readysem);
	256	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
	257	}
	258
	259	uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
	260
	261	/* spin to warm up CLPC :) */
0a7de745 A	262	while (mach_absolute_time() < timeout) {
	263	;
	264	}
5ba3f43e A	265
	266	kr = semaphore_signal_all(g_go_sem);
	267	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");
	268
0a7de745	269	for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
5ba3f43e A	270	kr = semaphore_wait(g_readysem);
	271	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
	272	}
	273
	274	uint32_t cpus_seen = 0;
	275
0a7de745 A	276	for (uint32_t i = 0; i < g_threads; i++) {
0a7de745 A	277	if (g_cpu_seen[i]) {
5ba3f43e	278	cpus_seen++;
0a7de745	279	}
5ba3f43e A	280
	281	printf("cpu %2d: %d\n", i, g_cpu_seen[i]);
	282	}
	283
	284	T_ASSERT_EQ(cpus_seen, g_threads, "test should have run threads on all CPUS");
	285	}