]> git.saurik.com Git - apple/xnu.git/blob - tests/perf_vmfault.c
db0613f961c868f98ad7886793dabbb913b002c8
[apple/xnu.git] / tests / perf_vmfault.c
1 #include <unistd.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/sysctl.h>
5 #include <mach/mach.h>
6 #include <mach/vm_map.h>
7 #include <darwintest.h>
8 #include <TargetConditionals.h>
9 #include <perfcheck_keys.h>
10
11 T_GLOBAL_META(
12 T_META_NAMESPACE("xnu.vm.perf"),
13 T_META_CHECK_LEAKS(false),
14 T_META_TAG_PERF
15 );
16
17 #ifdef DT_IOSMARK
18 #define MEMSIZE (1UL<<29) /* 512 MB */
19 #else
20 #define MEMSIZE (1UL<<27) /* 128 MB */
21 #endif
22
23 #define VM_TAG1 100
24 #define VM_TAG2 101
25
26 enum {
27 SOFT_FAULT,
28 ZERO_FILL,
29 NUM_FAULT_TYPES
30 };
31
32 enum {
33 VARIANT_DEFAULT = 1,
34 VARIANT_SINGLE_REGION,
35 VARIANT_MULTIPLE_REGIONS,
36 NUM_MAPPING_VARIANTS
37 };
38
39 static char *variant_str[] = {
40 "none",
41 "default",
42 "single-region",
43 "multiple-regions"
44 };
45
46
47 typedef struct {
48 char *region_addr;
49 char *shared_region_addr;
50 size_t region_len;
51 } memregion_config;
52
53 static memregion_config *memregion_config_per_thread;
54
55 static size_t pgsize;
56 static int num_threads;
57 static int ready_thread_count;
58 static int finished_thread_count;
59 static dt_stat_time_t runtime;
60 static pthread_cond_t start_cvar;
61 static pthread_cond_t threads_ready_cvar;
62 static pthread_cond_t threads_finished_cvar;
63 static pthread_mutex_t ready_thread_count_lock;
64 static pthread_mutex_t finished_thread_count_lock;
65
66 static void map_mem_regions_default(int fault_type, size_t memsize);
67 static void map_mem_regions_single(int fault_type, size_t memsize);
68 static void map_mem_regions_multiple(int fault_type, size_t memsize);
69 static void map_mem_regions(int fault_type, int mapping_variant, size_t memsize);
70 static void unmap_mem_regions(int mapping_variant, size_t memsize);
71 static void setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize);
72 static void fault_pages(int thread_id);
73 static void execute_threads(void);
74 static void *thread_setup(void *arg);
75 static void run_test(int fault_type, int mapping_variant, size_t memsize);
76 static void setup_and_run_test(int test, int threads);
77 static int get_ncpu(void);
78
79 /* Allocates memory using the default mmap behavior. Each VM region created is capped at 128 MB. */
80 static void
81 map_mem_regions_default(int fault_type, size_t memsize)
82 {
83 volatile char val;
84 vm_prot_t curprot, maxprot;
85 char *ptr, *memblock, *memblock_share = NULL;
86
87 memblock = (char *)mmap(NULL, memsize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
88 T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap");
89
90 if (fault_type == SOFT_FAULT) {
91 /* Fault in all the pages of the original region. */
92 for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) {
93 val = *ptr;
94 }
95 /* Remap the region so that subsequent accesses result in read soft faults. */
96 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
97 memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
98 &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
99 }
100 setup_per_thread_regions(memblock, memblock_share, fault_type, memsize);
101 }
102
103 /* Creates a single VM region by mapping in a named memory entry. */
104 static void
105 map_mem_regions_single(int fault_type, size_t memsize)
106 {
107 volatile char val;
108 vm_prot_t curprot, maxprot;
109 char *ptr, *memblock = NULL, *memblock_share = NULL;
110 vm_size_t size = memsize;
111 vm_offset_t addr1 = 0;
112 mach_port_t mem_handle = MACH_PORT_NULL;
113
114 /* Allocate a region and fault in all the pages. */
115 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_allocate(mach_task_self(), &addr1, size, VM_FLAGS_ANYWHERE), "vm_allocate");
116 for (ptr = (char *)addr1; ptr < (char *)addr1 + memsize; ptr += pgsize) {
117 val = *ptr;
118 }
119
120 /* Create a named memory entry from the region allocated above, and de-allocate said region. */
121 T_QUIET; T_ASSERT_MACH_SUCCESS(mach_make_memory_entry(mach_task_self(), &size, addr1, VM_PROT_ALL | MAP_MEM_NAMED_CREATE,
122 &mem_handle, MACH_PORT_NULL), "mach_make_memory_entry");
123 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_deallocate(mach_task_self(), addr1, size), "vm_deallocate");
124
125 /* Map in the named entry and deallocate it. */
126 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_map(mach_task_self(), (vm_address_t *)&memblock, size, 0, VM_FLAGS_ANYWHERE, mem_handle, 0,
127 FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_NONE), "vm_map");
128 T_QUIET; T_ASSERT_MACH_SUCCESS(mach_port_deallocate(mach_task_self(), mem_handle), "mach_port_deallocate");
129
130 if (fault_type == SOFT_FAULT) {
131 /* Fault in all the pages of the original region. */
132 for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) {
133 val = *ptr;
134 }
135 /* Remap the region so that subsequent accesses result in read soft faults. */
136 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
137 memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
138 &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
139 }
140 setup_per_thread_regions(memblock, memblock_share, fault_type, memsize);
141 }
142
143 /* Allocates a separate VM region for each thread. */
144 static void
145 map_mem_regions_multiple(int fault_type, size_t memsize)
146 {
147 int i;
148 size_t region_len, num_pages;
149 volatile char val;
150 char *ptr, *memblock, *memblock_share;
151 vm_prot_t curprot, maxprot;
152
153 num_pages = memsize / pgsize;
154
155 for (i = 0; i < num_threads; i++) {
156 memblock = NULL;
157
158 region_len = num_pages / (size_t)num_threads;
159 if ((size_t)i < num_pages % (size_t)num_threads) {
160 region_len++;
161 }
162 region_len *= pgsize;
163
164 int fd = VM_MAKE_TAG((i % 2)? VM_TAG1 : VM_TAG2);
165 memblock = (char *)mmap(NULL, region_len, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, fd, 0);
166 T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap");
167 memregion_config_per_thread[i].region_addr = memblock;
168 memregion_config_per_thread[i].shared_region_addr = 0;
169 memregion_config_per_thread[i].region_len = region_len;
170
171 if (fault_type == SOFT_FAULT) {
172 /* Fault in all the pages of the original region. */
173 for (ptr = memblock; ptr < memblock + region_len; ptr += pgsize) {
174 val = *ptr;
175 }
176 memblock_share = NULL;
177 /* Remap the region so that subsequent accesses result in read soft faults. */
178 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
179 region_len, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
180 &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
181 memregion_config_per_thread[i].shared_region_addr = memblock_share;
182 }
183 }
184 }
185
186 static void
187 map_mem_regions(int fault_type, int mapping_variant, size_t memsize)
188 {
189 memregion_config_per_thread = (memregion_config *)malloc(sizeof(*memregion_config_per_thread) * (size_t)num_threads);
190 switch (mapping_variant) {
191 case VARIANT_SINGLE_REGION:
192 map_mem_regions_single(fault_type, memsize);
193 break;
194 case VARIANT_MULTIPLE_REGIONS:
195 map_mem_regions_multiple(fault_type, memsize);
196 break;
197 case VARIANT_DEFAULT:
198 default:
199 map_mem_regions_default(fault_type, memsize);
200 }
201 }
202
203 static void
204 setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize)
205 {
206 int i;
207 size_t region_len, region_start, num_pages;
208
209 num_pages = memsize / pgsize;
210 for (i = 0; i < num_threads; i++) {
211 region_len = num_pages / (size_t)num_threads;
212 region_start = region_len * (size_t)i;
213
214 if ((size_t)i < num_pages % (size_t)num_threads) {
215 region_start += (size_t)i;
216 region_len++;
217 } else {
218 region_start += num_pages % (size_t)num_threads;
219 }
220
221 region_start *= pgsize;
222 region_len *= pgsize;
223
224 memregion_config_per_thread[i].region_addr = memblock + region_start;
225 memregion_config_per_thread[i].shared_region_addr = ((fault_type == SOFT_FAULT) ?
226 memblock_share + region_start : 0);
227 memregion_config_per_thread[i].region_len = region_len;
228 }
229 }
230
231 static void
232 unmap_mem_regions(int mapping_variant, size_t memsize)
233 {
234 if (mapping_variant == VARIANT_MULTIPLE_REGIONS) {
235 int i;
236 for (i = 0; i < num_threads; i++) {
237 if (memregion_config_per_thread[i].shared_region_addr != 0) {
238 T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].shared_region_addr,
239 memregion_config_per_thread[i].region_len), "munmap");
240 }
241 T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].region_addr,
242 memregion_config_per_thread[i].region_len), "munmap");
243 }
244 } else {
245 if (memregion_config_per_thread[0].shared_region_addr != 0) {
246 T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].shared_region_addr, memsize), "munmap");
247 }
248 T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].region_addr, memsize), "munmap");
249 }
250 }
251
252 static void
253 fault_pages(int thread_id)
254 {
255 char *ptr, *block;
256 volatile char val;
257
258 block = memregion_config_per_thread[thread_id].shared_region_addr ?
259 memregion_config_per_thread[thread_id].shared_region_addr :
260 memregion_config_per_thread[thread_id].region_addr;
261 for (ptr = block; ptr < block + memregion_config_per_thread[thread_id].region_len; ptr += pgsize) {
262 val = *ptr;
263 }
264 }
265
266 static void *
267 thread_setup(void *arg)
268 {
269 int my_index = *((int *)arg);
270
271 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock");
272 ready_thread_count++;
273 if (ready_thread_count == num_threads) {
274 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_ready_cvar), "pthread_cond_signal");
275 }
276 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&start_cvar, &ready_thread_count_lock), "pthread_cond_wait");
277 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock");
278
279 fault_pages(my_index);
280
281 /* Up the finished count */
282 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&finished_thread_count_lock), "pthread_mutex_lock");
283 finished_thread_count++;
284 if (finished_thread_count == num_threads) {
285 /* All the threads are done. Wake up the main thread */
286 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_finished_cvar), "pthread_cond_signal");
287 }
288 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&finished_thread_count_lock), "pthread_mutex_unlock");
289 return NULL;
290 }
291
292 static void
293 execute_threads(void)
294 {
295 int thread_index, thread_retval;
296 int *thread_indices;
297 void *thread_retval_ptr = &thread_retval;
298 pthread_t* threads;
299
300 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_ready_cvar, NULL), "pthread_cond_init");
301 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&start_cvar, NULL), "pthread_cond_init");
302 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&ready_thread_count_lock, NULL), "pthread_mutex_init");
303 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_finished_cvar, NULL), "pthread_cond_init");
304 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&finished_thread_count_lock, NULL), "pthread_mutex_init");
305 ready_thread_count = 0;
306 finished_thread_count = 0;
307
308 threads = (pthread_t *)malloc(sizeof(*threads) * (size_t)num_threads);
309 thread_indices = (int *)malloc(sizeof(*thread_indices) * (size_t)num_threads);
310 for (thread_index = 0; thread_index < num_threads; thread_index++) {
311 thread_indices[thread_index] = thread_index;
312 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_create(&threads[thread_index], NULL,
313 thread_setup, (void *)&thread_indices[thread_index]), "pthread_create");
314 }
315
316 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock");
317 while (ready_thread_count != num_threads) {
318 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_ready_cvar, &ready_thread_count_lock),
319 "pthread_cond_wait");
320 }
321 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock");
322
323 T_STAT_MEASURE(runtime) {
324 /* Ungate the threads */
325 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_broadcast(&start_cvar), "pthread_cond_broadcast");
326 /* Wait for the threads to finish */
327 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&finished_thread_count_lock), "pthread_mutex_lock");
328 while (finished_thread_count != num_threads) {
329 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_finished_cvar, &finished_thread_count_lock), "pthread_cond_wait");
330 }
331 };
332
333 /* Join the threads */
334 for (thread_index = 0; thread_index < num_threads; thread_index++) {
335 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_join(threads[thread_index], &thread_retval_ptr),
336 "pthread_join");
337 }
338
339 free(threads);
340 free(thread_indices);
341 }
342
343 static void
344 run_test(int fault_type, int mapping_variant, size_t memsize)
345 {
346 char metric_str[32];
347 size_t num_pages;
348 size_t sysctl_size = sizeof(pgsize);
349 int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0);
350 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pagesize failed");
351
352 num_pages = memsize / pgsize;
353
354 T_QUIET; T_ASSERT_LT(fault_type, NUM_FAULT_TYPES, "invalid test type");
355 T_QUIET; T_ASSERT_LT(mapping_variant, NUM_MAPPING_VARIANTS, "invalid mapping variant");
356 T_QUIET; T_ASSERT_GT(num_threads, 0, "num_threads <= 0");
357 T_QUIET; T_ASSERT_GT((int)num_pages / num_threads, 0, "num_pages/num_threads <= 0");
358
359 T_LOG("No. of cpus: %d", get_ncpu());
360 T_LOG("No. of threads: %d", num_threads);
361 T_LOG("No. of pages: %ld", num_pages);
362 T_LOG("Pagesize: %ld", pgsize);
363 T_LOG("Allocation size: %ld MB", memsize / (1024 * 1024));
364 T_LOG("Mapping variant: %s", variant_str[mapping_variant]);
365
366 snprintf(metric_str, 32, "Runtime-%s", variant_str[mapping_variant]);
367 runtime = dt_stat_time_create(metric_str);
368
369 while (!dt_stat_stable(runtime)) {
370 map_mem_regions(fault_type, mapping_variant, memsize);
371 execute_threads();
372 unmap_mem_regions(mapping_variant, memsize);
373 }
374
375 dt_stat_finalize(runtime);
376 T_LOG("Throughput-%s (MB/s): %lf\n\n", variant_str[mapping_variant], (double)memsize / (1024 * 1024) / dt_stat_mean((dt_stat_t)runtime));
377 }
378
379 static void
380 setup_and_run_test(int fault_type, int threads)
381 {
382 int i, mapping_variant;
383 size_t memsize;
384 char *e;
385
386 mapping_variant = VARIANT_DEFAULT;
387 memsize = MEMSIZE;
388 num_threads = threads;
389
390 if ((e = getenv("NTHREADS"))) {
391 if (threads == 1) {
392 T_SKIP("Custom environment variables specified. Skipping single threaded version.");
393 }
394 num_threads = (int)strtol(e, NULL, 0);
395 }
396
397 if ((e = getenv("MEMSIZEMB"))) {
398 memsize = (size_t)strtol(e, NULL, 0) * 1024 * 1024;
399 }
400
401 if ((e = getenv("VARIANT"))) {
402 mapping_variant = (int)strtol(e, NULL, 0);
403 run_test(fault_type, mapping_variant, memsize);
404 } else {
405 for (i = VARIANT_DEFAULT; i < NUM_MAPPING_VARIANTS; i++) {
406 run_test(fault_type, i, memsize);
407 }
408 }
409
410 T_END;
411 }
412
413 static int
414 get_ncpu(void)
415 {
416 int ncpu;
417 size_t length = sizeof(ncpu);
418
419 T_QUIET; T_ASSERT_POSIX_SUCCESS(sysctlbyname("hw.ncpu", &ncpu, &length, NULL, 0),
420 "failed to query hw.ncpu");
421 return ncpu;
422 }
423
424 T_DECL(read_soft_fault,
425 "Read soft faults (single thread)")
426 {
427 setup_and_run_test(SOFT_FAULT, 1);
428 }
429
430 T_DECL(read_soft_fault_multithreaded,
431 "Read soft faults (multi-threaded)")
432 {
433 char *e;
434 int nthreads;
435
436 /* iOSMark passes in the no. of threads via an env. variable */
437 if ((e = getenv("DT_STAT_NTHREADS"))) {
438 nthreads = (int)strtol(e, NULL, 0);
439 } else {
440 nthreads = get_ncpu();
441 if (nthreads == 1) {
442 T_SKIP("Skipping multi-threaded test on single core device.");
443 }
444 }
445 setup_and_run_test(SOFT_FAULT, nthreads);
446 }
447
448 T_DECL(zero_fill_fault,
449 "Zero fill faults (single thread)")
450 {
451 setup_and_run_test(ZERO_FILL, 1);
452 }
453
454 T_DECL(zero_fill_fault_multithreaded,
455 "Zero fill faults (multi-threaded)")
456 {
457 char *e;
458 int nthreads;
459
460 /* iOSMark passes in the no. of threads via an env. variable */
461 if ((e = getenv("DT_STAT_NTHREADS"))) {
462 nthreads = (int)strtol(e, NULL, 0);
463 } else {
464 nthreads = get_ncpu();
465 if (nthreads == 1) {
466 T_SKIP("Skipping multi-threaded test on single core device.");
467 }
468 }
469 setup_and_run_test(ZERO_FILL, nthreads);
470 }