]> git.saurik.com Git - apple/xnu.git/blob - tests/perf_vmfault.c
384d3586222a9bb5364443758a9625a962511745
[apple/xnu.git] / tests / perf_vmfault.c
1 #include <unistd.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/sysctl.h>
5 #include <mach/mach.h>
6 #include <mach/vm_map.h>
7 #include <darwintest.h>
8 #include <TargetConditionals.h>
9 #include <perfcheck_keys.h>
10
11 T_GLOBAL_META(
12 T_META_NAMESPACE("xnu.vm.perf"),
13 T_META_CHECK_LEAKS(false),
14 T_META_TAG_PERF
15 );
16
17 #ifdef DT_IOSMARK
18 #define MEMSIZE (1UL<<29) /* 512 MB */
19 #else
20 #define MEMSIZE (1UL<<27) /* 128 MB */
21 #endif
22
23 #define VM_TAG1 100
24 #define VM_TAG2 101
25
26 enum {
27 SOFT_FAULT,
28 ZERO_FILL,
29 NUM_FAULT_TYPES
30 };
31
32 enum {
33 VARIANT_DEFAULT = 1,
34 VARIANT_SINGLE_REGION,
35 VARIANT_MULTIPLE_REGIONS,
36 NUM_MAPPING_VARIANTS
37 };
38
39 static char *variant_str[] = {
40 "none",
41 "default",
42 "single-region",
43 "multiple-regions"
44 };
45
46
47 typedef struct {
48 char *region_addr;
49 char *shared_region_addr;
50 size_t region_len;
51 } memregion_config;
52
53 static memregion_config *memregion_config_per_thread;
54
55 static size_t pgsize;
56 static int num_threads;
57 static int ready_thread_count;
58 static dt_stat_time_t runtime;
59 static pthread_cond_t start_cvar;
60 static pthread_cond_t threads_ready_cvar;
61 static pthread_mutex_t ready_thread_count_lock;
62
63 static void map_mem_regions_default(int fault_type, size_t memsize);
64 static void map_mem_regions_single(int fault_type, size_t memsize);
65 static void map_mem_regions_multiple(int fault_type, size_t memsize);
66 static void map_mem_regions(int fault_type, int mapping_variant, size_t memsize);
67 static void unmap_mem_regions(int mapping_variant, size_t memsize);
68 static void setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize);
69 static void fault_pages(int thread_id);
70 static void execute_threads(void);
71 static void *thread_setup(void *arg);
72 static void run_test(int fault_type, int mapping_variant, size_t memsize);
73 static void setup_and_run_test(int test, int threads);
74 static int get_ncpu(void);
75
76 /* Allocates memory using the default mmap behavior. Each VM region created is capped at 128 MB. */
77 static void
78 map_mem_regions_default(int fault_type, size_t memsize)
79 {
80 volatile char val;
81 vm_prot_t curprot, maxprot;
82 char *ptr, *memblock, *memblock_share = NULL;
83
84 memblock = (char *)mmap(NULL, memsize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
85 T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap");
86
87 if (fault_type == SOFT_FAULT) {
88 /* Fault in all the pages of the original region. */
89 for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) {
90 val = *ptr;
91 }
92 /* Remap the region so that subsequent accesses result in read soft faults. */
93 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
94 memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
95 &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
96 }
97 setup_per_thread_regions(memblock, memblock_share, fault_type, memsize);
98 }
99
100 /* Creates a single VM region by mapping in a named memory entry. */
101 static void
102 map_mem_regions_single(int fault_type, size_t memsize)
103 {
104 volatile char val;
105 vm_prot_t curprot, maxprot;
106 char *ptr, *memblock = NULL, *memblock_share = NULL;
107 vm_size_t size = memsize;
108 vm_offset_t addr1 = 0;
109 mach_port_t mem_handle = MACH_PORT_NULL;
110
111 /* Allocate a region and fault in all the pages. */
112 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_allocate(mach_task_self(), &addr1, size, VM_FLAGS_ANYWHERE), "vm_allocate");
113 for (ptr = (char *)addr1; ptr < (char *)addr1 + memsize; ptr += pgsize) {
114 val = *ptr;
115 }
116
117 /* Create a named memory entry from the region allocated above, and de-allocate said region. */
118 T_QUIET; T_ASSERT_MACH_SUCCESS(mach_make_memory_entry(mach_task_self(), &size, addr1, VM_PROT_ALL | MAP_MEM_NAMED_CREATE,
119 &mem_handle, MACH_PORT_NULL), "mach_make_memory_entry");
120 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_deallocate(mach_task_self(), addr1, size), "vm_deallocate");
121
122 /* Map in the named entry and deallocate it. */
123 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_map(mach_task_self(), (vm_address_t *)&memblock, size, 0, VM_FLAGS_ANYWHERE, mem_handle, 0,
124 FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_NONE), "vm_map");
125 T_QUIET; T_ASSERT_MACH_SUCCESS(mach_port_deallocate(mach_task_self(), mem_handle), "mach_port_deallocate");
126
127 if (fault_type == SOFT_FAULT) {
128 /* Fault in all the pages of the original region. */
129 for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) {
130 val = *ptr;
131 }
132 /* Remap the region so that subsequent accesses result in read soft faults. */
133 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
134 memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
135 &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
136 }
137 setup_per_thread_regions(memblock, memblock_share, fault_type, memsize);
138 }
139
140 /* Allocates a separate VM region for each thread. */
141 static void
142 map_mem_regions_multiple(int fault_type, size_t memsize)
143 {
144 int i;
145 size_t region_len, num_pages;
146 volatile char val;
147 char *ptr, *memblock, *memblock_share;
148 vm_prot_t curprot, maxprot;
149
150 num_pages = memsize / pgsize;
151
152 for (i = 0; i < num_threads; i++) {
153 memblock = NULL;
154
155 region_len = num_pages / (size_t)num_threads;
156 if ((size_t)i < num_pages % (size_t)num_threads) {
157 region_len++;
158 }
159 region_len *= pgsize;
160
161 int flags = VM_MAKE_TAG((i % 2)? VM_TAG1 : VM_TAG2) | MAP_ANON | MAP_PRIVATE;
162
163 memblock = (char *)mmap(NULL, region_len, PROT_READ | PROT_WRITE, flags, -1, 0);
164 T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap");
165 memregion_config_per_thread[i].region_addr = memblock;
166 memregion_config_per_thread[i].shared_region_addr = 0;
167 memregion_config_per_thread[i].region_len = region_len;
168
169 if (fault_type == SOFT_FAULT) {
170 /* Fault in all the pages of the original region. */
171 for (ptr = memblock; ptr < memblock + region_len; ptr += pgsize) {
172 val = *ptr;
173 }
174 memblock_share = NULL;
175 /* Remap the region so that subsequent accesses result in read soft faults. */
176 T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
177 region_len, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
178 &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
179 memregion_config_per_thread[i].shared_region_addr = memblock_share;
180 }
181 }
182 }
183
184 static void
185 map_mem_regions(int fault_type, int mapping_variant, size_t memsize)
186 {
187 memregion_config_per_thread = (memregion_config *)malloc(sizeof(*memregion_config_per_thread) * (size_t)num_threads);
188 switch (mapping_variant) {
189 case VARIANT_SINGLE_REGION:
190 map_mem_regions_single(fault_type, memsize);
191 break;
192 case VARIANT_MULTIPLE_REGIONS:
193 map_mem_regions_multiple(fault_type, memsize);
194 break;
195 case VARIANT_DEFAULT:
196 default:
197 map_mem_regions_default(fault_type, memsize);
198 }
199 }
200
201 static void
202 setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize)
203 {
204 int i;
205 size_t region_len, region_start, num_pages;
206
207 num_pages = memsize / pgsize;
208 for (i = 0; i < num_threads; i++) {
209 region_len = num_pages / (size_t)num_threads;
210 region_start = region_len * (size_t)i;
211
212 if ((size_t)i < num_pages % (size_t)num_threads) {
213 region_start += (size_t)i;
214 region_len++;
215 } else {
216 region_start += num_pages % (size_t)num_threads;
217 }
218
219 region_start *= pgsize;
220 region_len *= pgsize;
221
222 memregion_config_per_thread[i].region_addr = memblock + region_start;
223 memregion_config_per_thread[i].shared_region_addr = ((fault_type == SOFT_FAULT) ?
224 memblock_share + region_start : 0);
225 memregion_config_per_thread[i].region_len = region_len;
226 }
227 }
228
229 static void
230 unmap_mem_regions(int mapping_variant, size_t memsize)
231 {
232 if (mapping_variant == VARIANT_MULTIPLE_REGIONS) {
233 int i;
234 for (i = 0; i < num_threads; i++) {
235 if (memregion_config_per_thread[i].shared_region_addr != 0) {
236 T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].shared_region_addr,
237 memregion_config_per_thread[i].region_len), "munmap");
238 }
239 T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].region_addr,
240 memregion_config_per_thread[i].region_len), "munmap");
241 }
242 } else {
243 if (memregion_config_per_thread[0].shared_region_addr != 0) {
244 T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].shared_region_addr, memsize), "munmap");
245 }
246 T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].region_addr, memsize), "munmap");
247 }
248 }
249
250 static void
251 fault_pages(int thread_id)
252 {
253 char *ptr, *block;
254 volatile char val;
255
256 block = memregion_config_per_thread[thread_id].shared_region_addr ?
257 memregion_config_per_thread[thread_id].shared_region_addr :
258 memregion_config_per_thread[thread_id].region_addr;
259 for (ptr = block; ptr < block + memregion_config_per_thread[thread_id].region_len; ptr += pgsize) {
260 val = *ptr;
261 }
262 }
263
264 static void *
265 thread_setup(void *arg)
266 {
267 int my_index = *((int *)arg);
268
269 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock");
270 ready_thread_count++;
271 if (ready_thread_count == num_threads) {
272 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_ready_cvar), "pthread_cond_signal");
273 }
274 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&start_cvar, &ready_thread_count_lock), "pthread_cond_wait");
275 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock");
276
277 fault_pages(my_index);
278 return NULL;
279 }
280
281 static void
282 execute_threads(void)
283 {
284 int thread_index, thread_retval;
285 int *thread_indices;
286 void *thread_retval_ptr = &thread_retval;
287 pthread_t* threads;
288
289 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_ready_cvar, NULL), "pthread_cond_init");
290 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&start_cvar, NULL), "pthread_cond_init");
291 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&ready_thread_count_lock, NULL), "pthread_mutex_init");
292 ready_thread_count = 0;
293
294 threads = (pthread_t *)malloc(sizeof(*threads) * (size_t)num_threads);
295 thread_indices = (int *)malloc(sizeof(*thread_indices) * (size_t)num_threads);
296 for (thread_index = 0; thread_index < num_threads; thread_index++) {
297 thread_indices[thread_index] = thread_index;
298 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_create(&threads[thread_index], NULL,
299 thread_setup, (void *)&thread_indices[thread_index]), "pthread_create");
300 }
301
302 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock");
303 if (ready_thread_count != num_threads) {
304 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_ready_cvar, &ready_thread_count_lock),
305 "pthread_cond_wait");
306 }
307 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock");
308
309 T_STAT_MEASURE(runtime) {
310 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_broadcast(&start_cvar), "pthread_cond_broadcast");
311 for (thread_index = 0; thread_index < num_threads; thread_index++) {
312 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_join(threads[thread_index], &thread_retval_ptr),
313 "pthread_join");
314 }
315 };
316
317 free(threads);
318 free(thread_indices);
319 }
320
321 static void
322 run_test(int fault_type, int mapping_variant, size_t memsize)
323 {
324 char metric_str[32];
325 size_t num_pages;
326 size_t sysctl_size = sizeof(pgsize);
327 int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0);
328 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pagesize failed");
329
330 num_pages = memsize / pgsize;
331
332 T_QUIET; T_ASSERT_LT(fault_type, NUM_FAULT_TYPES, "invalid test type");
333 T_QUIET; T_ASSERT_LT(mapping_variant, NUM_MAPPING_VARIANTS, "invalid mapping variant");
334 T_QUIET; T_ASSERT_GT(num_threads, 0, "num_threads <= 0");
335 T_QUIET; T_ASSERT_GT((int)num_pages / num_threads, 0, "num_pages/num_threads <= 0");
336
337 T_LOG("No. of cpus: %d", get_ncpu());
338 T_LOG("No. of threads: %d", num_threads);
339 T_LOG("No. of pages: %ld", num_pages);
340 T_LOG("Pagesize: %ld", pgsize);
341 T_LOG("Allocation size: %ld MB", memsize / (1024 * 1024));
342 T_LOG("Mapping variant: %s", variant_str[mapping_variant]);
343
344 snprintf(metric_str, 32, "Runtime-%s", variant_str[mapping_variant]);
345 runtime = dt_stat_time_create(metric_str);
346
347 // This sets the A/B failure threshold at 50% of baseline for Runtime
348 dt_stat_set_variable((dt_stat_t)runtime, kPCFailureThresholdPctVar, 50.0);
349 while (!dt_stat_stable(runtime)) {
350 map_mem_regions(fault_type, mapping_variant, memsize);
351 execute_threads();
352 unmap_mem_regions(mapping_variant, memsize);
353 }
354
355 dt_stat_finalize(runtime);
356 T_LOG("Throughput-%s (MB/s): %lf\n\n", variant_str[mapping_variant], (double)memsize / (1024 * 1024) / dt_stat_mean((dt_stat_t)runtime));
357 }
358
359 static void
360 setup_and_run_test(int fault_type, int threads)
361 {
362 int i, mapping_variant;
363 size_t memsize;
364 char *e;
365
366 mapping_variant = VARIANT_DEFAULT;
367 memsize = MEMSIZE;
368 num_threads = threads;
369
370 if ((e = getenv("NTHREADS"))) {
371 if (threads == 1) {
372 T_SKIP("Custom environment variables specified. Skipping single threaded version.");
373 }
374 num_threads = (int)strtol(e, NULL, 0);
375 }
376
377 if ((e = getenv("MEMSIZEMB"))) {
378 memsize = (size_t)strtol(e, NULL, 0) * 1024 * 1024;
379 }
380
381 if ((e = getenv("VARIANT"))) {
382 mapping_variant = (int)strtol(e, NULL, 0);
383 run_test(fault_type, mapping_variant, memsize);
384 } else {
385 for (i = VARIANT_DEFAULT; i < NUM_MAPPING_VARIANTS; i++) {
386 run_test(fault_type, i, memsize);
387 }
388 }
389
390 T_END;
391 }
392
393 static int
394 get_ncpu(void)
395 {
396 int ncpu;
397 size_t length = sizeof(ncpu);
398
399 T_QUIET; T_ASSERT_POSIX_SUCCESS(sysctlbyname("hw.ncpu", &ncpu, &length, NULL, 0),
400 "failed to query hw.ncpu");
401 return ncpu;
402 }
403
404 T_DECL(read_soft_fault,
405 "Read soft faults (single thread)")
406 {
407 setup_and_run_test(SOFT_FAULT, 1);
408 }
409
410 T_DECL(read_soft_fault_multithreaded,
411 "Read soft faults (multi-threaded)")
412 {
413 char *e;
414 int nthreads;
415
416 /* iOSMark passes in the no. of threads via an env. variable */
417 if ((e = getenv("DT_STAT_NTHREADS"))) {
418 nthreads = (int)strtol(e, NULL, 0);
419 } else {
420 nthreads = get_ncpu();
421 }
422 setup_and_run_test(SOFT_FAULT, nthreads);
423 }
424
425 T_DECL(zero_fill_fault,
426 "Zero fill faults (single thread)")
427 {
428 setup_and_run_test(ZERO_FILL, 1);
429 }
430
431 T_DECL(zero_fill_fault_multithreaded,
432 "Zero fill faults (multi-threaded)")
433 {
434 char *e;
435 int nthreads;
436
437 /* iOSMark passes in the no. of threads via an env. variable */
438 if ((e = getenv("DT_STAT_NTHREADS"))) {
439 nthreads = (int)strtol(e, NULL, 0);
440 } else {
441 nthreads = get_ncpu();
442 }
443 setup_and_run_test(ZERO_FILL, nthreads);
444 }