]>
Commit | Line | Data |
---|---|---|
1 | #include <unistd.h> | |
2 | #include <stdlib.h> | |
3 | #include <sys/mman.h> | |
4 | #include <sys/sysctl.h> | |
5 | #include <mach/mach.h> | |
6 | #include <mach/vm_map.h> | |
7 | #include <darwintest.h> | |
8 | #include <TargetConditionals.h> | |
9 | #include <perfcheck_keys.h> | |
10 | ||
11 | #include "benchmark/helpers.h" | |
12 | ||
13 | T_GLOBAL_META( | |
14 | T_META_NAMESPACE("xnu.vm.perf"), | |
15 | T_META_CHECK_LEAKS(false), | |
16 | T_META_TAG_PERF | |
17 | ); | |
18 | ||
19 | #ifdef DT_IOSMARK | |
20 | #define MEMSIZE (1UL<<29) /* 512 MB */ | |
21 | #else | |
22 | #define MEMSIZE (1UL<<27) /* 128 MB */ | |
23 | #endif | |
24 | ||
25 | #define VM_TAG1 100 | |
26 | #define VM_TAG2 101 | |
27 | ||
28 | enum { | |
29 | SOFT_FAULT, | |
30 | ZERO_FILL, | |
31 | NUM_FAULT_TYPES | |
32 | }; | |
33 | ||
34 | enum { | |
35 | VARIANT_DEFAULT = 1, | |
36 | VARIANT_SINGLE_REGION, | |
37 | VARIANT_MULTIPLE_REGIONS, | |
38 | NUM_MAPPING_VARIANTS | |
39 | }; | |
40 | ||
41 | static char *variant_str[] = { | |
42 | "none", | |
43 | "default", | |
44 | "single-region", | |
45 | "multiple-regions" | |
46 | }; | |
47 | ||
48 | ||
49 | typedef struct { | |
50 | char *region_addr; | |
51 | char *shared_region_addr; | |
52 | size_t region_len; | |
53 | } memregion_config; | |
54 | ||
55 | static memregion_config *memregion_config_per_thread; | |
56 | ||
57 | static size_t pgsize; | |
58 | static int num_threads; | |
59 | static int ready_thread_count; | |
60 | static int finished_thread_count; | |
61 | static dt_stat_time_t runtime; | |
62 | static pthread_cond_t start_cvar; | |
63 | static pthread_cond_t threads_ready_cvar; | |
64 | static pthread_cond_t threads_finished_cvar; | |
65 | static pthread_mutex_t ready_thread_count_lock; | |
66 | static pthread_mutex_t finished_thread_count_lock; | |
67 | ||
68 | static void map_mem_regions_default(int fault_type, size_t memsize); | |
69 | static void map_mem_regions_single(int fault_type, size_t memsize); | |
70 | static void map_mem_regions_multiple(int fault_type, size_t memsize); | |
71 | static void map_mem_regions(int fault_type, int mapping_variant, size_t memsize); | |
72 | static void unmap_mem_regions(int mapping_variant, size_t memsize); | |
73 | static void setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize); | |
74 | static void fault_pages(int thread_id); | |
75 | static void execute_threads(void); | |
76 | static void *thread_setup(void *arg); | |
77 | static void run_test(int fault_type, int mapping_variant, size_t memsize); | |
78 | static void setup_and_run_test(int test, int threads); | |
79 | ||
80 | /* Allocates memory using the default mmap behavior. Each VM region created is capped at 128 MB. */ | |
81 | static void | |
82 | map_mem_regions_default(int fault_type, size_t memsize) | |
83 | { | |
84 | volatile char val; | |
85 | vm_prot_t curprot, maxprot; | |
86 | char *ptr, *memblock, *memblock_share = NULL; | |
87 | ||
88 | memblock = (char *)mmap(NULL, memsize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); | |
89 | T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap"); | |
90 | ||
91 | if (fault_type == SOFT_FAULT) { | |
92 | /* Fault in all the pages of the original region. */ | |
93 | for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) { | |
94 | val = *ptr; | |
95 | } | |
96 | /* Remap the region so that subsequent accesses result in read soft faults. */ | |
97 | T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share, | |
98 | memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE, | |
99 | &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap"); | |
100 | } | |
101 | setup_per_thread_regions(memblock, memblock_share, fault_type, memsize); | |
102 | } | |
103 | ||
104 | /* Creates a single VM region by mapping in a named memory entry. */ | |
105 | static void | |
106 | map_mem_regions_single(int fault_type, size_t memsize) | |
107 | { | |
108 | volatile char val; | |
109 | vm_prot_t curprot, maxprot; | |
110 | char *ptr, *memblock = NULL, *memblock_share = NULL; | |
111 | vm_size_t size = memsize; | |
112 | vm_offset_t addr1 = 0; | |
113 | mach_port_t mem_handle = MACH_PORT_NULL; | |
114 | ||
115 | /* Allocate a region and fault in all the pages. */ | |
116 | T_QUIET; T_ASSERT_MACH_SUCCESS(vm_allocate(mach_task_self(), &addr1, size, VM_FLAGS_ANYWHERE), "vm_allocate"); | |
117 | for (ptr = (char *)addr1; ptr < (char *)addr1 + memsize; ptr += pgsize) { | |
118 | val = *ptr; | |
119 | } | |
120 | ||
121 | /* Create a named memory entry from the region allocated above, and de-allocate said region. */ | |
122 | T_QUIET; T_ASSERT_MACH_SUCCESS(mach_make_memory_entry(mach_task_self(), &size, addr1, VM_PROT_ALL | MAP_MEM_NAMED_CREATE, | |
123 | &mem_handle, MACH_PORT_NULL), "mach_make_memory_entry"); | |
124 | T_QUIET; T_ASSERT_MACH_SUCCESS(vm_deallocate(mach_task_self(), addr1, size), "vm_deallocate"); | |
125 | ||
126 | /* Map in the named entry and deallocate it. */ | |
127 | T_QUIET; T_ASSERT_MACH_SUCCESS(vm_map(mach_task_self(), (vm_address_t *)&memblock, size, 0, VM_FLAGS_ANYWHERE, mem_handle, 0, | |
128 | FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_NONE), "vm_map"); | |
129 | T_QUIET; T_ASSERT_MACH_SUCCESS(mach_port_deallocate(mach_task_self(), mem_handle), "mach_port_deallocate"); | |
130 | ||
131 | if (fault_type == SOFT_FAULT) { | |
132 | /* Fault in all the pages of the original region. */ | |
133 | for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) { | |
134 | val = *ptr; | |
135 | } | |
136 | /* Remap the region so that subsequent accesses result in read soft faults. */ | |
137 | T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share, | |
138 | memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE, | |
139 | &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap"); | |
140 | } | |
141 | setup_per_thread_regions(memblock, memblock_share, fault_type, memsize); | |
142 | } | |
143 | ||
144 | /* Allocates a separate VM region for each thread. */ | |
145 | static void | |
146 | map_mem_regions_multiple(int fault_type, size_t memsize) | |
147 | { | |
148 | int i; | |
149 | size_t region_len, num_pages; | |
150 | volatile char val; | |
151 | char *ptr, *memblock, *memblock_share; | |
152 | vm_prot_t curprot, maxprot; | |
153 | ||
154 | num_pages = memsize / pgsize; | |
155 | ||
156 | for (i = 0; i < num_threads; i++) { | |
157 | memblock = NULL; | |
158 | ||
159 | region_len = num_pages / (size_t)num_threads; | |
160 | if ((size_t)i < num_pages % (size_t)num_threads) { | |
161 | region_len++; | |
162 | } | |
163 | region_len *= pgsize; | |
164 | ||
165 | int fd = VM_MAKE_TAG((i % 2)? VM_TAG1 : VM_TAG2); | |
166 | memblock = (char *)mmap(NULL, region_len, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, fd, 0); | |
167 | T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap"); | |
168 | memregion_config_per_thread[i].region_addr = memblock; | |
169 | memregion_config_per_thread[i].shared_region_addr = 0; | |
170 | memregion_config_per_thread[i].region_len = region_len; | |
171 | ||
172 | if (fault_type == SOFT_FAULT) { | |
173 | /* Fault in all the pages of the original region. */ | |
174 | for (ptr = memblock; ptr < memblock + region_len; ptr += pgsize) { | |
175 | val = *ptr; | |
176 | } | |
177 | memblock_share = NULL; | |
178 | /* Remap the region so that subsequent accesses result in read soft faults. */ | |
179 | T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share, | |
180 | region_len, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE, | |
181 | &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap"); | |
182 | memregion_config_per_thread[i].shared_region_addr = memblock_share; | |
183 | } | |
184 | } | |
185 | } | |
186 | ||
187 | static void | |
188 | map_mem_regions(int fault_type, int mapping_variant, size_t memsize) | |
189 | { | |
190 | memregion_config_per_thread = (memregion_config *)malloc(sizeof(*memregion_config_per_thread) * (size_t)num_threads); | |
191 | switch (mapping_variant) { | |
192 | case VARIANT_SINGLE_REGION: | |
193 | map_mem_regions_single(fault_type, memsize); | |
194 | break; | |
195 | case VARIANT_MULTIPLE_REGIONS: | |
196 | map_mem_regions_multiple(fault_type, memsize); | |
197 | break; | |
198 | case VARIANT_DEFAULT: | |
199 | default: | |
200 | map_mem_regions_default(fault_type, memsize); | |
201 | } | |
202 | } | |
203 | ||
204 | static void | |
205 | setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize) | |
206 | { | |
207 | int i; | |
208 | size_t region_len, region_start, num_pages; | |
209 | ||
210 | num_pages = memsize / pgsize; | |
211 | for (i = 0; i < num_threads; i++) { | |
212 | region_len = num_pages / (size_t)num_threads; | |
213 | region_start = region_len * (size_t)i; | |
214 | ||
215 | if ((size_t)i < num_pages % (size_t)num_threads) { | |
216 | region_start += (size_t)i; | |
217 | region_len++; | |
218 | } else { | |
219 | region_start += num_pages % (size_t)num_threads; | |
220 | } | |
221 | ||
222 | region_start *= pgsize; | |
223 | region_len *= pgsize; | |
224 | ||
225 | memregion_config_per_thread[i].region_addr = memblock + region_start; | |
226 | memregion_config_per_thread[i].shared_region_addr = ((fault_type == SOFT_FAULT) ? | |
227 | memblock_share + region_start : 0); | |
228 | memregion_config_per_thread[i].region_len = region_len; | |
229 | } | |
230 | } | |
231 | ||
232 | static void | |
233 | unmap_mem_regions(int mapping_variant, size_t memsize) | |
234 | { | |
235 | if (mapping_variant == VARIANT_MULTIPLE_REGIONS) { | |
236 | int i; | |
237 | for (i = 0; i < num_threads; i++) { | |
238 | if (memregion_config_per_thread[i].shared_region_addr != 0) { | |
239 | T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].shared_region_addr, | |
240 | memregion_config_per_thread[i].region_len), "munmap"); | |
241 | } | |
242 | T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].region_addr, | |
243 | memregion_config_per_thread[i].region_len), "munmap"); | |
244 | } | |
245 | } else { | |
246 | if (memregion_config_per_thread[0].shared_region_addr != 0) { | |
247 | T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].shared_region_addr, memsize), "munmap"); | |
248 | } | |
249 | T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].region_addr, memsize), "munmap"); | |
250 | } | |
251 | } | |
252 | ||
253 | static void | |
254 | fault_pages(int thread_id) | |
255 | { | |
256 | char *ptr, *block; | |
257 | volatile char val; | |
258 | ||
259 | block = memregion_config_per_thread[thread_id].shared_region_addr ? | |
260 | memregion_config_per_thread[thread_id].shared_region_addr : | |
261 | memregion_config_per_thread[thread_id].region_addr; | |
262 | for (ptr = block; ptr < block + memregion_config_per_thread[thread_id].region_len; ptr += pgsize) { | |
263 | val = *ptr; | |
264 | } | |
265 | } | |
266 | ||
267 | static void * | |
268 | thread_setup(void *arg) | |
269 | { | |
270 | int my_index = *((int *)arg); | |
271 | ||
272 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock"); | |
273 | ready_thread_count++; | |
274 | if (ready_thread_count == num_threads) { | |
275 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_ready_cvar), "pthread_cond_signal"); | |
276 | } | |
277 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&start_cvar, &ready_thread_count_lock), "pthread_cond_wait"); | |
278 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock"); | |
279 | ||
280 | fault_pages(my_index); | |
281 | ||
282 | /* Up the finished count */ | |
283 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&finished_thread_count_lock), "pthread_mutex_lock"); | |
284 | finished_thread_count++; | |
285 | if (finished_thread_count == num_threads) { | |
286 | /* All the threads are done. Wake up the main thread */ | |
287 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_finished_cvar), "pthread_cond_signal"); | |
288 | } | |
289 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&finished_thread_count_lock), "pthread_mutex_unlock"); | |
290 | return NULL; | |
291 | } | |
292 | ||
293 | static void | |
294 | execute_threads(void) | |
295 | { | |
296 | int thread_index, thread_retval; | |
297 | int *thread_indices; | |
298 | void *thread_retval_ptr = &thread_retval; | |
299 | pthread_t* threads; | |
300 | ||
301 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_ready_cvar, NULL), "pthread_cond_init"); | |
302 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&start_cvar, NULL), "pthread_cond_init"); | |
303 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&ready_thread_count_lock, NULL), "pthread_mutex_init"); | |
304 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_finished_cvar, NULL), "pthread_cond_init"); | |
305 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&finished_thread_count_lock, NULL), "pthread_mutex_init"); | |
306 | ready_thread_count = 0; | |
307 | finished_thread_count = 0; | |
308 | ||
309 | threads = (pthread_t *)malloc(sizeof(*threads) * (size_t)num_threads); | |
310 | thread_indices = (int *)malloc(sizeof(*thread_indices) * (size_t)num_threads); | |
311 | for (thread_index = 0; thread_index < num_threads; thread_index++) { | |
312 | thread_indices[thread_index] = thread_index; | |
313 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_create(&threads[thread_index], NULL, | |
314 | thread_setup, (void *)&thread_indices[thread_index]), "pthread_create"); | |
315 | } | |
316 | ||
317 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock"); | |
318 | while (ready_thread_count != num_threads) { | |
319 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_ready_cvar, &ready_thread_count_lock), | |
320 | "pthread_cond_wait"); | |
321 | } | |
322 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock"); | |
323 | ||
324 | T_STAT_MEASURE(runtime) { | |
325 | /* Ungate the threads */ | |
326 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_broadcast(&start_cvar), "pthread_cond_broadcast"); | |
327 | /* Wait for the threads to finish */ | |
328 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&finished_thread_count_lock), "pthread_mutex_lock"); | |
329 | while (finished_thread_count != num_threads) { | |
330 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_finished_cvar, &finished_thread_count_lock), "pthread_cond_wait"); | |
331 | } | |
332 | }; | |
333 | ||
334 | /* Join the threads */ | |
335 | for (thread_index = 0; thread_index < num_threads; thread_index++) { | |
336 | T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_join(threads[thread_index], &thread_retval_ptr), | |
337 | "pthread_join"); | |
338 | } | |
339 | ||
340 | free(threads); | |
341 | free(thread_indices); | |
342 | } | |
343 | ||
344 | static void | |
345 | run_test(int fault_type, int mapping_variant, size_t memsize) | |
346 | { | |
347 | char metric_str[32]; | |
348 | size_t num_pages; | |
349 | size_t sysctl_size = sizeof(pgsize); | |
350 | int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0); | |
351 | T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pagesize failed"); | |
352 | ||
353 | num_pages = memsize / pgsize; | |
354 | ||
355 | T_QUIET; T_ASSERT_LT(fault_type, NUM_FAULT_TYPES, "invalid test type"); | |
356 | T_QUIET; T_ASSERT_LT(mapping_variant, NUM_MAPPING_VARIANTS, "invalid mapping variant"); | |
357 | T_QUIET; T_ASSERT_GT(num_threads, 0, "num_threads <= 0"); | |
358 | T_QUIET; T_ASSERT_GT((int)num_pages / num_threads, 0, "num_pages/num_threads <= 0"); | |
359 | ||
360 | T_LOG("No. of cpus: %d", get_ncpu()); | |
361 | T_LOG("No. of threads: %d", num_threads); | |
362 | T_LOG("No. of pages: %ld", num_pages); | |
363 | T_LOG("Pagesize: %ld", pgsize); | |
364 | T_LOG("Allocation size: %ld MB", memsize / (1024 * 1024)); | |
365 | T_LOG("Mapping variant: %s", variant_str[mapping_variant]); | |
366 | ||
367 | snprintf(metric_str, 32, "Runtime-%s", variant_str[mapping_variant]); | |
368 | runtime = dt_stat_time_create(metric_str); | |
369 | ||
370 | while (!dt_stat_stable(runtime)) { | |
371 | map_mem_regions(fault_type, mapping_variant, memsize); | |
372 | execute_threads(); | |
373 | unmap_mem_regions(mapping_variant, memsize); | |
374 | } | |
375 | ||
376 | dt_stat_finalize(runtime); | |
377 | T_LOG("Throughput-%s (MB/s): %lf\n\n", variant_str[mapping_variant], (double)memsize / (1024 * 1024) / dt_stat_mean((dt_stat_t)runtime)); | |
378 | } | |
379 | ||
380 | static void | |
381 | setup_and_run_test(int fault_type, int threads) | |
382 | { | |
383 | int i, mapping_variant; | |
384 | size_t memsize; | |
385 | char *e; | |
386 | ||
387 | mapping_variant = VARIANT_DEFAULT; | |
388 | memsize = MEMSIZE; | |
389 | num_threads = threads; | |
390 | ||
391 | if ((e = getenv("NTHREADS"))) { | |
392 | if (threads == 1) { | |
393 | T_SKIP("Custom environment variables specified. Skipping single threaded version."); | |
394 | } | |
395 | num_threads = (int)strtol(e, NULL, 0); | |
396 | } | |
397 | ||
398 | if ((e = getenv("MEMSIZEMB"))) { | |
399 | memsize = (size_t)strtol(e, NULL, 0) * 1024 * 1024; | |
400 | } | |
401 | ||
402 | if ((e = getenv("VARIANT"))) { | |
403 | mapping_variant = (int)strtol(e, NULL, 0); | |
404 | run_test(fault_type, mapping_variant, memsize); | |
405 | } else { | |
406 | for (i = VARIANT_DEFAULT; i < NUM_MAPPING_VARIANTS; i++) { | |
407 | run_test(fault_type, i, memsize); | |
408 | } | |
409 | } | |
410 | ||
411 | T_END; | |
412 | } | |
413 | ||
414 | T_DECL(read_soft_fault, | |
415 | "Read soft faults (single thread)") | |
416 | { | |
417 | setup_and_run_test(SOFT_FAULT, 1); | |
418 | } | |
419 | ||
420 | T_DECL(read_soft_fault_multithreaded, | |
421 | "Read soft faults (multi-threaded)") | |
422 | { | |
423 | char *e; | |
424 | int nthreads; | |
425 | ||
426 | /* iOSMark passes in the no. of threads via an env. variable */ | |
427 | if ((e = getenv("DT_STAT_NTHREADS"))) { | |
428 | nthreads = (int)strtol(e, NULL, 0); | |
429 | } else { | |
430 | nthreads = get_ncpu(); | |
431 | if (nthreads == 1) { | |
432 | T_SKIP("Skipping multi-threaded test on single core device."); | |
433 | } | |
434 | } | |
435 | setup_and_run_test(SOFT_FAULT, nthreads); | |
436 | } | |
437 | ||
438 | T_DECL(zero_fill_fault, | |
439 | "Zero fill faults (single thread)") | |
440 | { | |
441 | setup_and_run_test(ZERO_FILL, 1); | |
442 | } | |
443 | ||
444 | T_DECL(zero_fill_fault_multithreaded, | |
445 | "Zero fill faults (multi-threaded)") | |
446 | { | |
447 | char *e; | |
448 | int nthreads; | |
449 | ||
450 | /* iOSMark passes in the no. of threads via an env. variable */ | |
451 | if ((e = getenv("DT_STAT_NTHREADS"))) { | |
452 | nthreads = (int)strtol(e, NULL, 0); | |
453 | } else { | |
454 | nthreads = get_ncpu(); | |
455 | if (nthreads == 1) { | |
456 | T_SKIP("Skipping multi-threaded test on single core device."); | |
457 | } | |
458 | } | |
459 | setup_and_run_test(ZERO_FILL, nthreads); | |
460 | } |