]> git.saurik.com Git - apple/xnu.git/blob - tests/vm/fault_throughput.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / tests / vm / fault_throughput.c
1 /*
2 * Benchmark VM fault throughput.
3 * This test faults memory for a configurable amount of time across a
4 * configurable number of threads. Currently it only measures zero fill faults.
5 * Currently it supports two variants:
6 * 1. Each thread gets its own vm objects to fault in
7 * 2. Threads share vm objects
8 *
9 * We'll add more fault types as we identify problematic user-facing workloads
10 * in macro benchmarks.
11 *
12 * Throughput is reported as pages / second using both wall time and cpu time.
13 * CPU time is a more reliable metric for regression testing, but wall time can
14 * highlight blocking in the VM.
15 *
16 * Running this benchmark directly is not recommended.
17 * Use fault_throughput.lua which provides a nicer interface and outputs
18 * perfdata.
19 */
20 #include <assert.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <strings.h>
27
28 #include <sys/mman.h>
29 #include <sys/types.h>
30 #include <sys/sysctl.h>
31
32 /*
33 * TODO: Make this benchmark runnable on linux so we can do a perf comparison.
34 * We're mostly using POSIX APIs, but we'll need to replace
35 * the sysctls with the /proc equivalents, and replace clock_gettime_nsec_np
36 * with the linux equivalent.
37 */
38 #include <mach/mach.h>
39
40 #include <TargetConditionals.h>
41
42 #include <pthread.h>
43 #include <stdatomic.h>
44
45 #include "vm/perf_helpers.h"
46
47 #if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
48 /*
49 * On non-embedded platforms we coalesce vm objects up to 128 MB, so
50 * we make the objects 128 MB on that platform to ensure they're not
51 * merged with anything else.
52 */
53 const static size_t kVmObjectSize = 128 * (1UL << 20);
54 #else
55 /*
56 * Embedded platforms don't coalesce vm objects. This number
57 * needs to be big enough that faulting it in dwarfs the cost of dequeuing
58 * it from the work queue, but can't be too large or else we won't be able
59 * to allocate one per thread in the separate-objects benchmark.
60 */
61 const static size_t kVmObjectSize = 4 * (1UL << 20);
62 #endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
63 static const clockid_t kWallTimeClock = CLOCK_MONOTONIC_RAW;
64 static const clockid_t kThreadCPUTimeClock = CLOCK_THREAD_CPUTIME_ID;
65 /* These globals are set dynamically during test setup based on sysctls. */
66 static uint64_t kCacheLineSize = 0;
67 /* The VM page size */
68 static size_t kPageSize = 0;
69
70
71 typedef struct fault_buffer {
72 unsigned char* fb_start; /* The start of this buffer. */
73 size_t fb_size; /* The size of this buffer in bytes. */
74 } fault_buffer_t;
75
76 typedef enum test_variant {
77 VARIANT_SEPARATE_VM_OBJECTS,
78 VARIANT_SHARE_VM_OBJECTS
79 } test_variant_t;
80
81 typedef struct test_globals {
82 /* This lock protects: tg_cv, tg_running_count, tg_done, tg_current_iteration, and tg_iterations_completed. */
83 pthread_mutex_t tg_lock;
84 pthread_cond_t tg_cv;
85 /* The number of currently running threads */
86 unsigned int tg_running_count;
87 /* Set during cleanup to indicate that the benchmark is over. */
88 bool tg_done;
89 size_t tg_current_iteration;
90 size_t tg_iterations_completed;
91 unsigned int tg_num_threads;
92 test_variant_t tg_variant;
93 /*
94 * An array of memory objects to fault in.
95 * This is basically a workqueue of
96 * contiguous chunks of memory that the worker threads
97 * will fault in.
98 */
99 fault_buffer_t *tg_fault_buffer_arr;
100 size_t tg_fault_buffer_arr_length;
101 /*
102 * To avoid false sharing, we pad the test globals with an extra cache line and place the atomic
103 * next_fault_buffer_index size_t after the cache line.
104 */
105 __unused char padding[];
106 /*
107 * This field is directly after the padding buffer.
108 * It is used to synchronize access to tg_fault_buffer_arr.
109 */
110 //_Atomic size_t tg_next_fault_buffer_index;
111 } test_globals_t;
112
113 static const char* kSeparateObjectsArgument = "separate-objects";
114 static const char* kShareObjectsArgument = "share-objects";
115
116 /* Arguments parsed from the command line */
117 typedef struct test_args {
118 uint32_t n_threads;
119 uint64_t duration_seconds;
120 test_variant_t variant;
121 bool verbose;
122 } test_args_t;
123
124 /* Get a (wall-time) timestamp in nanoseconds */
125 static uint64_t get_timestamp_ns(void);
126 /* Get the number of cpus on this device. */
127 static unsigned int get_ncpu(void);
128 /*
129 * Fault in the pages in the given buffer.
130 */
131 static void fault_pages(fault_buffer_t *buffer, size_t stride);
132 /* Get a unique fault buffer from the global work queue. */
133 static fault_buffer_t *get_fault_buffer(test_globals_t* globals);
134 /*
135 * Grabs buffers from the global test structure and faults them in, using this
136 * test variant's stride, until there are no more buffers to grab.
137 * Returns the number of microseconds spent on-cpu.
138 */
139 static uint64_t grab_and_fault_pages(test_globals_t* globals);
140
141 static bool worker_thread_iteration_setup(size_t current_iteration, test_globals_t *globals);
142 static void worker_thread_iteration_complete(test_globals_t *globals);
143
144 static void parse_arguments(int argc, char **argv, test_args_t *args);
145 /*
146 * Sets up the test globals and spawns the background threads to do the faults.
147 * Returns an array of size `num_threads`
148 * Containing the thread ids of the forked threads.
149 */
150 static pthread_t* setup_test(test_globals_t *globals, const test_args_t *args, size_t memory_size, bool verbose);
151 static test_globals_t *allocate_test_globals(void);
152 /* Initializes variables in the globals array. */
153 static void init_globals(test_globals_t *globals, const test_args_t *args);
154 static inline _Atomic size_t *next_fault_buffer_index_ptr(test_globals_t *globals);
155 /*
156 * Called on the main thread.
157 * Waits for the background threads to be ready, sets up the memory objects,
158 * and then starts a faulting iteration.
159 * Returns the start (wall) time.
160 */
161 static uint64_t start_iteration(test_globals_t* globals, test_variant_t variant, bool verbose);
162 /*
163 * Called on the main thread.
164 * Waits for the background threads to complete the iteration and cleans up.
165 * Returns the total amount of time spent faulting pages in nanoseconds by all threads thus far.
166 */
167 static uint64_t finish_iteration(test_globals_t *globals, uint64_t start_time);
168 /*
169 * Called on the main thread.
170 * Maps buffers and places them in the work queue.
171 */
172 static void setup_memory(test_globals_t* globals, test_variant_t variant);
173 /*
174 * Dump test results as a csv to stdout.
175 * Use fault_throughput.lua to convert to perfdata.
176 */
177 static void output_results(const test_globals_t *globals, double walltime_elapsed_seconds, double cputime_elapsed_seconds);
178 static void cleanup_test(test_globals_t *globals);
179 /*
180 * Join the background threads and return the total microseconds
181 * of cpu time spent faulting across all of the threads.
182 * Takes ownership of the threads array and frees it.
183 */
184 static uint64_t join_background_threads(test_globals_t *globals, pthread_t *threads);
185 static void unmap_fault_buffers(test_globals_t *globals);
186 /*
187 * Get the stride between each vm object in the fault buffer array.
188 */
189 static size_t fault_buffer_stride(const test_globals_t *globals);
190
191 int
192 main(int argc, char **argv)
193 {
194 /* How much memory should the test consume (per-core on the system)? */
195 #if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
196 static const size_t memory_per_core = kVmObjectSize;
197 #else
198 static const size_t memory_per_core = 25 * (1UL << 20);
199 #endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
200 const size_t kMemSize = memory_per_core * get_ncpu();
201 test_globals_t *globals = allocate_test_globals();
202 /* Total wall-time spent faulting in pages. */
203 uint64_t wall_time_elapsed_ns = 0;
204 /* Total cpu-time spent faulting in pages */
205 uint64_t cpu_time_faulting_us = 0;
206 uint64_t start_time_ns;
207 test_args_t args;
208 parse_arguments(argc, argv, &args);
209 pthread_t* threads = setup_test(globals, &args, kMemSize, args.verbose);
210
211 /* Keep doing more iterations until we've hit our (wall) time budget */
212 while (wall_time_elapsed_ns < args.duration_seconds * kNumNanosecondsInSecond) {
213 benchmark_log(args.verbose, "----Starting Iteration %lu-----\n", globals->tg_current_iteration + 1);
214 start_time_ns = start_iteration(globals, args.variant, args.verbose);
215 wall_time_elapsed_ns += finish_iteration(globals, start_time_ns);
216 benchmark_log(args.verbose, "----Completed Iteration %lu----\n", globals->tg_current_iteration);
217 }
218
219 benchmark_log(args.verbose, "Hit time budget\nJoining worker threads\n");
220 cpu_time_faulting_us = join_background_threads(globals, threads);
221 benchmark_log(args.verbose, "----End Test Output----\n");
222 output_results(globals, (double) wall_time_elapsed_ns / kNumNanosecondsInSecond,
223 (double)cpu_time_faulting_us / kNumMicrosecondsInSecond);
224 cleanup_test(globals);
225
226 return 0;
227 }
228
229
230 /* The main loop for the worker threads. */
231 static void*
232 faulting_thread(void* arg)
233 {
234 test_globals_t* globals = arg;
235 uint64_t on_cpu_time_faulting = 0;
236 size_t current_iteration = 1;
237 while (true) {
238 bool should_continue = worker_thread_iteration_setup(current_iteration, globals);
239 if (!should_continue) {
240 break;
241 }
242 on_cpu_time_faulting += grab_and_fault_pages(globals);
243 worker_thread_iteration_complete(globals);
244 current_iteration++;
245 }
246 return (void*)on_cpu_time_faulting;
247 }
248
249 /*
250 * Called on the worker threads before each iteration to synchronize this
251 * iteration start with the other threads.
252 * Returns true if the iteration should continue, and false if the test is over.
253 */
254 static bool
255 worker_thread_iteration_setup(size_t current_iteration, test_globals_t *globals)
256 {
257 bool should_continue = false;
258 int ret = 0;
259 // Gate on the other threads being ready to start
260 ret = pthread_mutex_lock(&globals->tg_lock);
261 assert(ret == 0);
262 globals->tg_running_count++;
263 if (globals->tg_running_count == globals->tg_num_threads) {
264 // All the worker threads are running.
265 // Wake up the main thread so that it can ungate the test.
266 ret = pthread_cond_broadcast(&globals->tg_cv);
267 assert(ret == 0);
268 }
269 /*
270 * The main thread will start this iteration by incrementing
271 * tg_current_iteration. Block until that happens.
272 * See start_iteration for the wakeup code.
273 */
274 while (!globals->tg_done && globals->tg_current_iteration != current_iteration) {
275 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
276 assert(ret == 0);
277 }
278 should_continue = !globals->tg_done;
279 ret = pthread_mutex_unlock(&globals->tg_lock);
280 assert(ret == 0);
281 return should_continue;
282 }
283
284 /*
285 * Called on the worker threads before each iteration finishes to synchronize
286 * with the other threads.
287 */
288 static void
289 worker_thread_iteration_complete(test_globals_t *globals)
290 {
291 int ret;
292 // Mark ourselves as done and wait for the other threads to finish
293 ret = pthread_mutex_lock(&globals->tg_lock);
294 assert(ret == 0);
295 globals->tg_running_count--;
296 if (globals->tg_running_count == 0) {
297 // We're the last one to finish. Mark this iteration as completed and wake everyone up.
298 globals->tg_iterations_completed++;
299 ret = pthread_cond_broadcast(&globals->tg_cv);
300 assert(ret == 0);
301 } else {
302 // Others are running. Wait for them to finish.
303 while (globals->tg_iterations_completed != globals->tg_current_iteration) {
304 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
305 assert(ret == 0);
306 }
307 }
308 ret = pthread_mutex_unlock(&globals->tg_lock);
309 assert(ret == 0);
310 }
311
312 static void
313 fault_pages(fault_buffer_t *buffer, size_t stride)
314 {
315 volatile unsigned char val;
316 for (unsigned char* ptr = buffer->fb_start; ptr < buffer->fb_start + buffer->fb_size; ptr += stride) {
317 val = *ptr;
318 }
319 }
320
321 static fault_buffer_t *
322 get_fault_buffer(test_globals_t* globals)
323 {
324 size_t index = atomic_fetch_add_explicit(next_fault_buffer_index_ptr(globals), 1UL, memory_order_acq_rel);
325 if (index < globals->tg_fault_buffer_arr_length) {
326 return &globals->tg_fault_buffer_arr[index];
327 }
328 return NULL;
329 }
330
331 static uint64_t
332 grab_and_fault_pages(test_globals_t* globals)
333 {
334 struct timespec start_time, end_time;
335 uint64_t nanoseconds_faulting_on_cpu = 0;
336 int ret;
337 size_t stride = fault_buffer_stride(globals) * kPageSize;
338 while (true) {
339 fault_buffer_t *object = get_fault_buffer(globals);
340 if (object == NULL) {
341 break;
342 }
343 ret = clock_gettime(kThreadCPUTimeClock, &start_time);
344 assert(ret == 0);
345
346 fault_pages(object, stride);
347
348 ret = clock_gettime(kThreadCPUTimeClock, &end_time);
349 assert(ret == 0);
350 nanoseconds_faulting_on_cpu += (unsigned long) timespec_difference_us(&end_time, &start_time);
351 }
352 return nanoseconds_faulting_on_cpu;
353 }
354
355 static uint64_t
356 start_iteration(test_globals_t* globals, test_variant_t variant, bool verbose)
357 {
358 int ret;
359 uint64_t start_time;
360 ret = pthread_mutex_lock(&globals->tg_lock);
361 assert(ret == 0);
362 benchmark_log(verbose, "Waiting for workers to catch up before starting next iteration.\n");
363 /* Wait until all the threads are ready to go to the next iteration */
364 while (globals->tg_running_count != globals->tg_num_threads) {
365 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
366 }
367 benchmark_log(verbose, "Workers are all caught up\n");
368 setup_memory(globals, variant);
369 benchmark_log(verbose, "Initialized data structures for iteration. Waking workers.\n");
370 /* Grab a timestamp, tick the current iteration, and wake up the worker threads */
371 start_time = get_timestamp_ns();
372 globals->tg_current_iteration++;
373 ret = pthread_mutex_unlock(&globals->tg_lock);
374 assert(ret == 0);
375 ret = pthread_cond_broadcast(&globals->tg_cv);
376 assert(ret == 0);
377 return start_time;
378 }
379
380 static uint64_t
381 finish_iteration(test_globals_t* globals, uint64_t start_time)
382 {
383 int ret;
384 uint64_t end_time;
385 ret = pthread_mutex_lock(&globals->tg_lock);
386 assert(ret == 0);
387 while (globals->tg_iterations_completed != globals->tg_current_iteration) {
388 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
389 }
390 end_time = get_timestamp_ns();
391 ret = pthread_mutex_unlock(&globals->tg_lock);
392 unmap_fault_buffers(globals);
393 assert(ret == 0);
394 return end_time - start_time;
395 }
396
397 static void
398 setup_memory(test_globals_t* globals, test_variant_t variant)
399 {
400 size_t stride = fault_buffer_stride(globals);
401 for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
402 fault_buffer_t *object = &globals->tg_fault_buffer_arr[i];
403 object->fb_start = mmap_buffer(kVmObjectSize);
404 object->fb_size = kVmObjectSize;
405 if (variant == VARIANT_SHARE_VM_OBJECTS) {
406 /*
407 * Insert another buffer into the work queue for each thread.
408 * Each buffer starts 1 page past where the previous buffer started into the vm object.
409 * Since each thread strides by the number of threads * the page size they won't fault in the same pages.
410 */
411 for (size_t j = 1; j < globals->tg_num_threads; j++) {
412 size_t offset = kPageSize * j;
413 fault_buffer_t *offset_object = &globals->tg_fault_buffer_arr[i + j];
414 offset_object->fb_start = object->fb_start + offset;
415 offset_object->fb_size = object->fb_size - offset;
416 }
417 } else if (variant != VARIANT_SEPARATE_VM_OBJECTS) {
418 fprintf(stderr, "Unknown test variant.\n");
419 exit(2);
420 }
421 }
422 atomic_store_explicit(next_fault_buffer_index_ptr(globals), 0, memory_order_release);
423 }
424
425 static void
426 unmap_fault_buffers(test_globals_t* globals)
427 {
428 size_t stride = fault_buffer_stride(globals);
429 for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
430 fault_buffer_t *buffer = &globals->tg_fault_buffer_arr[i];
431 int res = munmap(buffer->fb_start, buffer->fb_size);
432 assert(res == 0);
433 }
434 }
435
436 static test_globals_t *
437 allocate_test_globals()
438 {
439 test_globals_t *globals = NULL;
440 int ret;
441 if (kCacheLineSize == 0) {
442 size_t cachelinesize_size = sizeof(kCacheLineSize);
443 ret = sysctlbyname("hw.cachelinesize", &kCacheLineSize, &cachelinesize_size, NULL, 0);
444 assert(ret == 0);
445 assert(kCacheLineSize > 0);
446 }
447 if (kPageSize == 0) {
448 size_t pagesize_size = sizeof(kPageSize);
449 ret = sysctlbyname("vm.pagesize", &kPageSize, &pagesize_size, NULL, 0);
450 assert(ret == 0);
451 assert(kPageSize > 0);
452 }
453 size_t test_globals_size = sizeof(test_globals_t) + kCacheLineSize + sizeof(_Atomic size_t);
454 globals = malloc(test_globals_size);
455 assert(globals != NULL);
456 memset(globals, 0, test_globals_size);
457 return globals;
458 }
459
460 static void
461 init_globals(test_globals_t *globals, const test_args_t *args)
462 {
463 pthread_mutexattr_t mutex_attrs;
464 pthread_condattr_t cond_attrs;
465 int ret;
466 memset(globals, 0, sizeof(test_globals_t));
467
468 ret = pthread_mutexattr_init(&mutex_attrs);
469 assert(ret == 0);
470 ret = pthread_mutex_init(&globals->tg_lock, &mutex_attrs);
471 assert(ret == 0);
472 ret = pthread_condattr_init(&cond_attrs);
473 assert(ret == 0);
474 ret = pthread_cond_init(&globals->tg_cv, &cond_attrs);
475 assert(ret == 0);
476 ret = pthread_mutexattr_destroy(&mutex_attrs);
477 assert(ret == 0);
478 ret = pthread_condattr_destroy(&cond_attrs);
479 assert(ret == 0);
480
481 globals->tg_num_threads = args->n_threads;
482 globals->tg_variant = args->variant;
483 }
484
485 static void
486 init_fault_buffer_arr(test_globals_t *globals, const test_args_t *args, size_t memory_size)
487 {
488 if (args->variant == VARIANT_SEPARATE_VM_OBJECTS) {
489 // This variant creates separate vm objects up to memory size bytes total
490 globals->tg_fault_buffer_arr_length = memory_size / kVmObjectSize;
491 } else if (args->variant == VARIANT_SHARE_VM_OBJECTS) {
492 // This variant creates separate vm objects up to memory size bytes total
493 // And places a pointer into each vm object for each thread.
494 globals->tg_fault_buffer_arr_length = memory_size / kVmObjectSize * globals->tg_num_threads;
495 } else {
496 fprintf(stderr, "Unsupported test variant.\n");
497 exit(2);
498 }
499 // It doesn't make sense to have more threads than elements in the work queue.
500 // NB: Since we scale memory_size by ncpus, this can only happen if the user
501 // tries to run the benchmark with many more threads than cores.
502 assert(globals->tg_fault_buffer_arr_length >= globals->tg_num_threads);
503 globals->tg_fault_buffer_arr = calloc(sizeof(fault_buffer_t), globals->tg_fault_buffer_arr_length);
504 assert(globals->tg_fault_buffer_arr);
505 }
506
507 static pthread_t *
508 spawn_worker_threads(test_globals_t *globals, unsigned int num_threads)
509 {
510 int ret;
511 pthread_attr_t pthread_attrs;
512 globals->tg_num_threads = num_threads;
513 pthread_t* threads = malloc(sizeof(pthread_t) * num_threads);
514 assert(threads);
515 ret = pthread_attr_init(&pthread_attrs);
516 assert(ret == 0);
517 // Spawn the background threads
518 for (unsigned int i = 0; i < num_threads; i++) {
519 ret = pthread_create(threads + i, &pthread_attrs, faulting_thread, globals);
520 assert(ret == 0);
521 }
522 ret = pthread_attr_destroy(&pthread_attrs);
523 assert(ret == 0);
524 return threads;
525 }
526
527 static pthread_t*
528 setup_test(test_globals_t *globals, const test_args_t *args, size_t memory_size, bool verbose)
529 {
530 init_globals(globals, args);
531 init_fault_buffer_arr(globals, args, memory_size);
532 benchmark_log(verbose, "Initialized global data structures.\n");
533 pthread_t *workers = spawn_worker_threads(globals, args->n_threads);
534 benchmark_log(verbose, "Spawned workers.\n");
535 return workers;
536 }
537
538 static uint64_t
539 join_background_threads(test_globals_t *globals, pthread_t *threads)
540 {
541 // Set the done flag so that the background threads exit
542 int ret;
543 uint64_t total_cputime_spent_faulting = 0;
544 ret = pthread_mutex_lock(&globals->tg_lock);
545 assert(ret == 0);
546 globals->tg_done = true;
547 ret = pthread_cond_broadcast(&globals->tg_cv);
548 assert(ret == 0);
549 ret = pthread_mutex_unlock(&globals->tg_lock);
550 assert(ret == 0);
551
552 // Join the background threads
553 for (unsigned int i = 0; i < globals->tg_num_threads; i++) {
554 uint64_t cputime_spent_faulting = 0;
555 ret = pthread_join(threads[i], (void **)&cputime_spent_faulting);
556 assert(ret == 0);
557 total_cputime_spent_faulting += cputime_spent_faulting;
558 }
559 free(threads);
560 return total_cputime_spent_faulting;
561 }
562
563 static void
564 cleanup_test(test_globals_t* globals)
565 {
566 int ret;
567 ret = pthread_mutex_destroy(&globals->tg_lock);
568 assert(ret == 0);
569 ret = pthread_cond_destroy(&globals->tg_cv);
570 assert(ret == 0);
571 free(globals->tg_fault_buffer_arr);
572 free(globals);
573 }
574
575 static void
576 output_results(const test_globals_t* globals, double walltime_elapsed_seconds, double cputime_elapsed_seconds)
577 {
578 size_t pgsize;
579 size_t sysctl_size = sizeof(pgsize);
580 int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0);
581 assert(ret == 0);
582 size_t num_pages = 0;
583 double walltime_throughput, cputime_throughput;
584 size_t stride = fault_buffer_stride(globals);
585 for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
586 num_pages += globals->tg_fault_buffer_arr[i].fb_size / pgsize;
587 }
588 num_pages *= globals->tg_iterations_completed;
589 walltime_throughput = num_pages / walltime_elapsed_seconds;
590 cputime_throughput = num_pages / cputime_elapsed_seconds;
591 printf("-----Results-----\n");
592 printf("Throughput (pages / wall second), Throughput (pages / CPU second)\n");
593 printf("%f,%f\n", walltime_throughput, cputime_throughput);
594 }
595
596 static void
597 print_help(char** argv)
598 {
599 fprintf(stderr, "%s: <test-variant> [-v] duration num_threads\n", argv[0]);
600 fprintf(stderr, "\ntest variants:\n");
601 fprintf(stderr, " %s Fault in different vm objects in each thread.\n", kSeparateObjectsArgument);
602 fprintf(stderr, " %s Share vm objects across faulting threads.\n", kShareObjectsArgument);
603 }
604
605 static uint64_t
606 get_timestamp_ns()
607 {
608 return clock_gettime_nsec_np(kWallTimeClock);
609 }
610
611 static unsigned int
612 get_ncpu(void)
613 {
614 int ncpu;
615 size_t sysctl_size = sizeof(ncpu);
616 int ret = sysctlbyname("hw.ncpu", &ncpu, &sysctl_size, NULL, 0);
617 assert(ret == 0);
618 return (unsigned int) ncpu;
619 }
620
621 static void
622 parse_arguments(int argc, char** argv, test_args_t *args)
623 {
624 int current_argument = 1;
625 memset(args, 0, sizeof(test_args_t));
626 if (argc < 4 || argc > 6) {
627 print_help(argv);
628 exit(1);
629 }
630 if (argv[current_argument][0] == '-') {
631 if (strcmp(argv[current_argument], "-v") == 0) {
632 args->verbose = true;
633 } else {
634 fprintf(stderr, "Unknown argument %s\n", argv[current_argument]);
635 print_help(argv);
636 exit(1);
637 }
638 current_argument++;
639 }
640 if (strncasecmp(argv[current_argument], kSeparateObjectsArgument, strlen(kSeparateObjectsArgument)) == 0) {
641 args->variant = VARIANT_SEPARATE_VM_OBJECTS;
642 } else if (strncasecmp(argv[current_argument], kShareObjectsArgument, strlen(kShareObjectsArgument)) == 0) {
643 args->variant = VARIANT_SHARE_VM_OBJECTS;
644 } else {
645 print_help(argv);
646 exit(1);
647 }
648 current_argument++;
649
650 long duration = strtol(argv[current_argument++], NULL, 10);
651 if (duration == 0) {
652 print_help(argv);
653 exit(1);
654 }
655 long num_cores = strtol(argv[current_argument++], NULL, 10);
656 if (num_cores == 0) {
657 print_help(argv);
658 exit(1);
659 }
660 assert(num_cores > 0 && num_cores <= get_ncpu());
661 args->n_threads = (unsigned int) num_cores;
662 args->duration_seconds = (unsigned long) duration;
663 }
664
665 static inline
666 _Atomic size_t *
667 next_fault_buffer_index_ptr(test_globals_t *globals)
668 {
669 return (_Atomic size_t *) (((ptrdiff_t)(globals + 1)) + (int64_t)kCacheLineSize);
670 }
671 static size_t
672 fault_buffer_stride(const test_globals_t *globals)
673 {
674 size_t stride;
675 if (globals->tg_variant == VARIANT_SEPARATE_VM_OBJECTS) {
676 stride = 1;
677 } else if (globals->tg_variant == VARIANT_SHARE_VM_OBJECTS) {
678 stride = globals->tg_num_threads;
679 } else {
680 fprintf(stderr, "Unknown variant\n");
681 exit(-1);
682 }
683 return stride;
684 }