[apple/xnu.git] / tools / tests / darwintests / ioperf.c

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <pthread.h>
#include <errno.h>
#include <err.h>
#include <string.h>
#include <assert.h>
#include <sysexits.h>
#include <getopt.h>
#include <spawn.h>
#include <stdbool.h>
#include <sys/sysctl.h>
#include <mach/mach_time.h>
#include <mach/mach.h>
#include <mach/semaphore.h>
#include <TargetConditionals.h>

#ifdef T_NAMESPACE
#undef T_NAMESPACE
#endif

#include <darwintest.h>
#include <stdatomic.h>

#define MAX_THREADS         32
#define SPIN_SECS           6
#define THR_SPINNER_PRI     63
#define THR_MANAGER_PRI     62
#define WARMUP_ITERATIONS   100
#define FILE_SIZE           (16384 * 4096)
#define IO_SIZE             4096
#define IO_COUNT            2500

static mach_timebase_info_data_t timebase_info;
static semaphore_t semaphore;
static semaphore_t worker_sem;
static uint32_t g_numcpus;
static _Atomic uint32_t keep_going = 1;
int test_file_fd = 0;
char *data_buf = NULL;
extern char **environ;

static struct {
    pthread_t thread;
} threads[MAX_THREADS];

static uint64_t 
nanos_to_abs(uint64_t nanos) 
{ 
    return nanos * timebase_info.denom / timebase_info.numer;
}

static void
io_perf_test_io_init(void)
{
    int spawn_ret, pid;
    char *const mount_args[] = {"/usr/local/sbin/mount_nand.sh", NULL};
    spawn_ret = posix_spawn(&pid, mount_args[0], NULL, NULL, mount_args, environ);
    if (spawn_ret < 0) {
	T_SKIP("NAND mounting in LTE not possible on this device. Skipping test!");
    }
    waitpid(pid, &spawn_ret, 0);
    if (WIFEXITED(spawn_ret) && !WEXITSTATUS(spawn_ret)) {
        T_PASS("NAND mounted successfully");
    } else {
        T_SKIP("Unable to mount NAND. Skipping test!");
    }

    /* Mark the main thread as fixed priority */
    struct sched_param param = {.sched_priority = THR_MANAGER_PRI};
    T_ASSERT_POSIX_ZERO(pthread_setschedparam(pthread_self(), SCHED_FIFO, &param),
            "pthread_setschedparam");

    /* Set I/O Policy to Tier 0 */
    T_ASSERT_POSIX_ZERO(setiopolicy_np(IOPOL_TYPE_DISK, IOPOL_SCOPE_PROCESS,
            IOPOL_IMPORTANT), "setiopolicy");

    /* Create data buffer */
    data_buf = malloc(IO_SIZE * 16);
    T_ASSERT_NOTNULL(data_buf, "Data buffer allocation");

    int rndfd = open("/dev/urandom", O_RDONLY, S_IRUSR);
    T_ASSERT_POSIX_SUCCESS(rndfd, "Open /dev/urandom");
    T_ASSERT_GE_INT((int)read(rndfd, data_buf, IO_SIZE * 16), 0, "read /dev/urandom");
    close(rndfd);

    /* Create test file */
    int fd = open("/mnt2/test", O_CREAT | O_WRONLY, S_IRUSR);
    T_ASSERT_POSIX_SUCCESS(fd, 0, "Open /mnt2/test for writing!");

    T_ASSERT_POSIX_ZERO(fcntl(fd, F_NOCACHE, 1), "fcntl F_NOCACHE enable");
    for (int size = 0; size < FILE_SIZE;) {
        T_QUIET;
        T_ASSERT_GE_INT((int)write(fd, data_buf, IO_SIZE * 16), 0, "write test file");
        size += (IO_SIZE * 16);
    }
    close(fd);
    sync();

}

static pthread_t
create_thread(uint32_t thread_id, uint32_t priority, bool fixpri, 
        void *(*start_routine)(void *))
{
    int rv;
    pthread_t new_thread;
    struct sched_param param = { .sched_priority = (int)priority };
    pthread_attr_t attr;

    T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), "pthread_attr_init");

    T_ASSERT_POSIX_ZERO(pthread_attr_setschedparam(&attr, &param),
            "pthread_attr_setschedparam");

    if (fixpri) {
        T_ASSERT_POSIX_ZERO(pthread_attr_setschedpolicy(&attr, SCHED_RR),
                "pthread_attr_setschedpolicy");
    }

    T_ASSERT_POSIX_ZERO(pthread_create(&new_thread, &attr, start_routine,
            (void*)(uintptr_t)thread_id), "pthread_create");

    T_ASSERT_POSIX_ZERO(pthread_attr_destroy(&attr), "pthread_attr_destroy");

    threads[thread_id].thread = new_thread;

    return new_thread;
}

/* Spin until a specified number of seconds elapses */
static void
spin_for_duration(uint32_t seconds)
{
    uint64_t duration       = nanos_to_abs((uint64_t)seconds * NSEC_PER_SEC);
    uint64_t current_time   = mach_absolute_time();
    uint64_t timeout        = duration + current_time;

    uint64_t spin_count = 0;

    while (mach_absolute_time() < timeout && atomic_load_explicit(&keep_going,
		memory_order_relaxed)) {
        spin_count++;
    }
}

static void *
spin_thread(void *arg)
{
    uint32_t thread_id = (uint32_t) arg;
    char name[30] = "";

    snprintf(name, sizeof(name), "spin thread %2d", thread_id);
    pthread_setname_np(name);
    T_ASSERT_MACH_SUCCESS(semaphore_wait_signal(semaphore, worker_sem),
            "semaphore_wait_signal");
    spin_for_duration(SPIN_SECS);
    return NULL;
}

void
perform_io(dt_stat_time_t stat)
{
    /* Open the test data file */
    int test_file_fd = open("/mnt2/test", O_RDONLY);
    T_WITH_ERRNO;
    T_ASSERT_POSIX_SUCCESS(test_file_fd, "Open test data file");

    /* Disable caching and read-ahead for the file */
    T_ASSERT_POSIX_ZERO(fcntl(test_file_fd, F_NOCACHE, 1), "fcntl F_NOCACHE enable");
    T_ASSERT_POSIX_ZERO(fcntl(test_file_fd, F_RDAHEAD, 0), "fcntl F_RDAHEAD disable");

    uint32_t count = 0;
    int ret;

    for (int i=0; i < WARMUP_ITERATIONS; i++) {
        /* Warmup loop */
        read(test_file_fd, data_buf, IO_SIZE);
    }
    
    do {
        T_STAT_MEASURE(stat) {
            ret = read(test_file_fd, data_buf, IO_SIZE);
        }
        if (ret == 0) {
            T_QUIET;
            T_ASSERT_POSIX_SUCCESS(lseek(test_file_fd, 0, SEEK_SET), "lseek begin");
        } else if (ret < 0) {
            T_FAIL("read failure");
            T_END;
        }
        count++;
    } while(count < IO_COUNT);
    close(test_file_fd);
}

T_GLOBAL_META(T_META_NAMESPACE("xnu.io"));

/* Disable the test on MacOS for now */
T_DECL(read_perf, "Sequential Uncached Read Performance", T_META_TYPE_PERF, T_META_CHECK_LEAKS(NO), T_META_ASROOT(YES), T_META_LTEPHASE(LTE_POSTINIT))
{

#if !CONFIG_EMBEDDED
    T_SKIP("Not supported on MacOS");
#endif /* !CONFIG_EMBEDDED */

    io_perf_test_io_init();
    pthread_setname_np("main thread");

    T_ASSERT_MACH_SUCCESS(mach_timebase_info(&timebase_info), "mach_timebase_info");

    dt_stat_time_t seq_noload = dt_stat_time_create("sequential read latency (CPU idle)");
    perform_io(seq_noload);
    dt_stat_finalize(seq_noload);

    /* 
     * We create spinner threads for this test so that all other cores are 
     * busy. That way the I/O issue thread has to context switch to the 
     * IOWorkLoop thread and back for the I/O. 
     */
    T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &semaphore,
            SYNC_POLICY_FIFO, 0), "semaphore_create");

    T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &worker_sem,
            SYNC_POLICY_FIFO, 0), "semaphore_create");
    
    size_t ncpu_size = sizeof(g_numcpus);
    T_ASSERT_POSIX_SUCCESS(sysctlbyname("hw.ncpu", &g_numcpus, &ncpu_size, NULL, 0),
            "sysctlbyname(hw.ncpu)");

    T_LOG("hw.ncpu: %d\n", g_numcpus);
    uint32_t n_spinners = g_numcpus - 1;

    for (uint32_t thread_id = 0; thread_id < n_spinners; thread_id++) {
        threads[thread_id].thread = create_thread(thread_id, THR_SPINNER_PRI,
                true, &spin_thread);
    }

    for (uint32_t thread_id = 0; thread_id < n_spinners; thread_id++) {
        T_ASSERT_MACH_SUCCESS(semaphore_wait(worker_sem), "semaphore_wait");
    }

    T_ASSERT_MACH_SUCCESS(semaphore_signal_all(semaphore), "semaphore_signal");
    
    dt_stat_time_t seq_load = dt_stat_time_create("sequential read latency (Single CPU)");
    perform_io(seq_load);
    dt_stat_finalize(seq_load);
    
    atomic_store_explicit(&keep_going, 0, memory_order_relaxed);
    for (uint32_t thread_id = 0; thread_id < n_spinners; thread_id++) {
        T_ASSERT_POSIX_ZERO(pthread_join(threads[thread_id].thread, NULL),
                "pthread_join %d", thread_id);
    }
}
Commit	Line	Data
5ba3f43e A	1	#include <unistd.h>
	2	#include <stdio.h>
	3	#include <stdlib.h>
	4	#include <fcntl.h>
	5	#include <pthread.h>
	6	#include <errno.h>
	7	#include <err.h>
	8	#include <string.h>
	9	#include <assert.h>
	10	#include <sysexits.h>
	11	#include <getopt.h>
	12	#include <spawn.h>
	13	#include <stdbool.h>
	14	#include <sys/sysctl.h>
	15	#include <mach/mach_time.h>
	16	#include <mach/mach.h>
	17	#include <mach/semaphore.h>
	18	#include <TargetConditionals.h>
	19
	20	#ifdef T_NAMESPACE
	21	#undef T_NAMESPACE
	22	#endif
	23
	24	#include <darwintest.h>
	25	#include <stdatomic.h>
	26
	27	#define MAX_THREADS 32
	28	#define SPIN_SECS 6
	29	#define THR_SPINNER_PRI 63
	30	#define THR_MANAGER_PRI 62
	31	#define WARMUP_ITERATIONS 100
	32	#define FILE_SIZE (16384 * 4096)
	33	#define IO_SIZE 4096
	34	#define IO_COUNT 2500
	35
	36	static mach_timebase_info_data_t timebase_info;
	37	static semaphore_t semaphore;
	38	static semaphore_t worker_sem;
	39	static uint32_t g_numcpus;
	40	static _Atomic uint32_t keep_going = 1;
	41	int test_file_fd = 0;
	42	char *data_buf = NULL;
	43	extern char **environ;
	44
	45	static struct {
	46	pthread_t thread;
	47	} threads[MAX_THREADS];
	48
	49	static uint64_t
	50	nanos_to_abs(uint64_t nanos)
	51	{
	52	return nanos * timebase_info.denom / timebase_info.numer;
	53	}
	54
	55	static void
	56	io_perf_test_io_init(void)
	57	{
	58	int spawn_ret, pid;
	59	char *const mount_args[] = {"/usr/local/sbin/mount_nand.sh", NULL};
	60	spawn_ret = posix_spawn(&pid, mount_args[0], NULL, NULL, mount_args, environ);
	61	if (spawn_ret < 0) {
	62	T_SKIP("NAND mounting in LTE not possible on this device. Skipping test!");
	63	}
	64	waitpid(pid, &spawn_ret, 0);
65	if (WIFEXITED(spawn_ret) && !WEXITSTATUS(spawn_ret)) {
66	T_PASS("NAND mounted successfully");
67	} else {
68	T_SKIP("Unable to mount NAND. Skipping test!");
69	}
70
71	/* Mark the main thread as fixed priority */
72	struct sched_param param = {.sched_priority = THR_MANAGER_PRI};
73	T_ASSERT_POSIX_ZERO(pthread_setschedparam(pthread_self(), SCHED_FIFO, &param),
74	"pthread_setschedparam");
75
76	/* Set I/O Policy to Tier 0 */
77	T_ASSERT_POSIX_ZERO(setiopolicy_np(IOPOL_TYPE_DISK, IOPOL_SCOPE_PROCESS,
78	IOPOL_IMPORTANT), "setiopolicy");
79
80	/* Create data buffer */
81	data_buf = malloc(IO_SIZE * 16);
82	T_ASSERT_NOTNULL(data_buf, "Data buffer allocation");
83
84	int rndfd = open("/dev/urandom", O_RDONLY, S_IRUSR);
85	T_ASSERT_POSIX_SUCCESS(rndfd, "Open /dev/urandom");
86	T_ASSERT_GE_INT((int)read(rndfd, data_buf, IO_SIZE * 16), 0, "read /dev/urandom");
87	close(rndfd);
88
89	/* Create test file */
90	int fd = open("/mnt2/test", O_CREAT \| O_WRONLY, S_IRUSR);
91	T_ASSERT_POSIX_SUCCESS(fd, 0, "Open /mnt2/test for writing!");
92
93	T_ASSERT_POSIX_ZERO(fcntl(fd, F_NOCACHE, 1), "fcntl F_NOCACHE enable");
94	for (int size = 0; size < FILE_SIZE;) {
95	T_QUIET;
96	T_ASSERT_GE_INT((int)write(fd, data_buf, IO_SIZE * 16), 0, "write test file");
97	size += (IO_SIZE * 16);
98	}
99	close(fd);
100	sync();
101
102	}
103
104	static pthread_t
105	create_thread(uint32_t thread_id, uint32_t priority, bool fixpri,
106	void (start_routine)(void *))
107	{
108	int rv;
109	pthread_t new_thread;
110	struct sched_param param = { .sched_priority = (int)priority };
111	pthread_attr_t attr;
112
113	T_ASSERT_POSIX_ZERO(pthread_attr_init(&attr), "pthread_attr_init");
114
115	T_ASSERT_POSIX_ZERO(pthread_attr_setschedparam(&attr, &param),
116	"pthread_attr_setschedparam");
117
118	if (fixpri) {
119	T_ASSERT_POSIX_ZERO(pthread_attr_setschedpolicy(&attr, SCHED_RR),
120	"pthread_attr_setschedpolicy");
121	}
122
123	T_ASSERT_POSIX_ZERO(pthread_create(&new_thread, &attr, start_routine,
124	(void*)(uintptr_t)thread_id), "pthread_create");
125
126	T_ASSERT_POSIX_ZERO(pthread_attr_destroy(&attr), "pthread_attr_destroy");
127
128	threads[thread_id].thread = new_thread;
129
130	return new_thread;
131	}
132
133	/* Spin until a specified number of seconds elapses */
134	static void
135	spin_for_duration(uint32_t seconds)
136	{
137	uint64_t duration = nanos_to_abs((uint64_t)seconds * NSEC_PER_SEC);
138	uint64_t current_time = mach_absolute_time();
139	uint64_t timeout = duration + current_time;
140
141	uint64_t spin_count = 0;
142
143	while (mach_absolute_time() < timeout && atomic_load_explicit(&keep_going,
144	memory_order_relaxed)) {
145	spin_count++;
146	}
147	}
148
149	static void *
150	spin_thread(void *arg)
151	{
152	uint32_t thread_id = (uint32_t) arg;
153	char name[30] = "";
154
155	snprintf(name, sizeof(name), "spin thread %2d", thread_id);
156	pthread_setname_np(name);
157	T_ASSERT_MACH_SUCCESS(semaphore_wait_signal(semaphore, worker_sem),
158	"semaphore_wait_signal");
159	spin_for_duration(SPIN_SECS);
160	return NULL;
161	}
162
163	void
164	perform_io(dt_stat_time_t stat)
165	{
166	/* Open the test data file */
167	int test_file_fd = open("/mnt2/test", O_RDONLY);
168	T_WITH_ERRNO;
169	T_ASSERT_POSIX_SUCCESS(test_file_fd, "Open test data file");
170
171	/* Disable caching and read-ahead for the file */
172	T_ASSERT_POSIX_ZERO(fcntl(test_file_fd, F_NOCACHE, 1), "fcntl F_NOCACHE enable");
173	T_ASSERT_POSIX_ZERO(fcntl(test_file_fd, F_RDAHEAD, 0), "fcntl F_RDAHEAD disable");
174
175	uint32_t count = 0;
176	int ret;
177
178	for (int i=0; i < WARMUP_ITERATIONS; i++) {
179	/* Warmup loop */
180	read(test_file_fd, data_buf, IO_SIZE);
181	}
182
183	do {
184	T_STAT_MEASURE(stat) {
185	ret = read(test_file_fd, data_buf, IO_SIZE);
186	}
187	if (ret == 0) {
188	T_QUIET;
189	T_ASSERT_POSIX_SUCCESS(lseek(test_file_fd, 0, SEEK_SET), "lseek begin");
190	} else if (ret < 0) {
191	T_FAIL("read failure");
192	T_END;
193	}
194	count++;
195	} while(count < IO_COUNT);
196	close(test_file_fd);
197	}
198
199	T_GLOBAL_META(T_META_NAMESPACE("xnu.io"));
200
201	/* Disable the test on MacOS for now */
202	T_DECL(read_perf, "Sequential Uncached Read Performance", T_META_TYPE_PERF, T_META_CHECK_LEAKS(NO), T_META_ASROOT(YES), T_META_LTEPHASE(LTE_POSTINIT))
203	{
204
205	#if !CONFIG_EMBEDDED
206	T_SKIP("Not supported on MacOS");
207	#endif /* !CONFIG_EMBEDDED */
208
209	io_perf_test_io_init();
210	pthread_setname_np("main thread");
211
212	T_ASSERT_MACH_SUCCESS(mach_timebase_info(&timebase_info), "mach_timebase_info");
213
214	dt_stat_time_t seq_noload = dt_stat_time_create("sequential read latency (CPU idle)");
215	perform_io(seq_noload);
216	dt_stat_finalize(seq_noload);
217
218	/*
219	* We create spinner threads for this test so that all other cores are
220	* busy. That way the I/O issue thread has to context switch to the
221	* IOWorkLoop thread and back for the I/O.
222	*/
223	T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &semaphore,
224	SYNC_POLICY_FIFO, 0), "semaphore_create");
225
226	T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &worker_sem,
227	SYNC_POLICY_FIFO, 0), "semaphore_create");
228
229	size_t ncpu_size = sizeof(g_numcpus);
230	T_ASSERT_POSIX_SUCCESS(sysctlbyname("hw.ncpu", &g_numcpus, &ncpu_size, NULL, 0),
231	"sysctlbyname(hw.ncpu)");
232
233	T_LOG("hw.ncpu: %d\n", g_numcpus);
234	uint32_t n_spinners = g_numcpus - 1;
235
236	for (uint32_t thread_id = 0; thread_id < n_spinners; thread_id++) {
237	threads[thread_id].thread = create_thread(thread_id, THR_SPINNER_PRI,
238	true, &spin_thread);
239	}
240
241	for (uint32_t thread_id = 0; thread_id < n_spinners; thread_id++) {
242	T_ASSERT_MACH_SUCCESS(semaphore_wait(worker_sem), "semaphore_wait");
243	}
244
245	T_ASSERT_MACH_SUCCESS(semaphore_signal_all(semaphore), "semaphore_signal");
246
247	dt_stat_time_t seq_load = dt_stat_time_create("sequential read latency (Single CPU)");
248	perform_io(seq_load);
249	dt_stat_finalize(seq_load);
250
251	atomic_store_explicit(&keep_going, 0, memory_order_relaxed);
252	for (uint32_t thread_id = 0; thread_id < n_spinners; thread_id++) {
253	T_ASSERT_POSIX_ZERO(pthread_join(threads[thread_id].thread, NULL),
254	"pthread_join %d", thread_id);
255	}
256	}