tests/vm/fault_throughput.c

   1 /*
   2  * Benchmark VM fault throughput.
   3  * This test faults memory for a configurable amount of time across a
   4  * configurable number of threads. Currently it only measures zero fill faults.
   5  * Currently it supports two variants:
   6  * 1. Each thread gets its own vm objects to fault in
   7  * 2. Threads share vm objects
   8  *
   9  * We'll add more fault types as we identify problematic user-facing workloads
  10  * in macro benchmarks.
  11  *
  12  * Throughput is reported as pages / second using both wall time and cpu time.
  13  * CPU time is a more reliable metric for regression testing, but wall time can
  14  * highlight blocking in the VM.
  15  *
  16  * Running this benchmark directly is not recommended.
  17  * Use fault_throughput.lua which provides a nicer interface and outputs
  18  * perfdata.
  19  */
  20 #include <assert.h>
  21 #include <ctype.h>
  22 #include <errno.h>
  23 #include <stdarg.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <strings.h>
  27
  28 #include <sys/mman.h>
  29 #include <sys/types.h>
  30 #include <sys/sysctl.h>
  31
  32 /*
  33  * TODO: Make this benchmark runnable on linux so we can do a perf comparison.
  34  * We're mostly using POSIX APIs, but we'll need to replace
  35  * the sysctls with the /proc equivalents, and replace clock_gettime_nsec_np
  36  * with the linux equivalent.
  37  */
  38 #include <mach/mach.h>
  39
  40 #include <TargetConditionals.h>
  41
  42 #include <pthread.h>
  43 #include <stdatomic.h>
  44
  45 #include "vm/perf_helpers.h"
  46
  47 #if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
  48 /*
  49  * On non-embedded platforms we coalesce vm objects up to 128 MB, so
  50  * we make the objects 128 MB on that platform to ensure they're not
  51  * merged with anything else.
  52  */
  53 const static size_t kVmObjectSize = 128 * (1UL << 20);
  54 #else
  55 /*
  56  * Embedded platforms don't coalesce vm objects. This number
  57  * needs to be big enough that faulting it in dwarfs the cost of dequeuing
  58  * it from the work queue, but can't be too large or else we won't be able
  59  * to allocate one per thread in the separate-objects benchmark.
  60  */
  61 const static size_t kVmObjectSize = 4 * (1UL << 20);
  62 #endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
  63 static const clockid_t kWallTimeClock = CLOCK_MONOTONIC_RAW;
  64 static const clockid_t kThreadCPUTimeClock = CLOCK_THREAD_CPUTIME_ID;
  65 /* These globals are set dynamically during test setup based on sysctls. */
  66 static uint64_t kCacheLineSize = 0;
  67 /* The VM page size */
  68 static size_t kPageSize = 0;
  69
  70
  71 typedef struct fault_buffer {
  72         unsigned char* fb_start; /* The start of this buffer. */
  73         size_t fb_size; /* The size of this buffer in bytes. */
  74 } fault_buffer_t;
  75
  76 typedef enum test_variant {
  77         VARIANT_SEPARATE_VM_OBJECTS,
  78         VARIANT_SHARE_VM_OBJECTS
  79 } test_variant_t;
  80
  81 typedef struct test_globals {
  82         /* This lock protects: tg_cv, tg_running_count, tg_done, tg_current_iteration, and tg_iterations_completed. */
  83         pthread_mutex_t tg_lock;
  84         pthread_cond_t tg_cv;
  85         /* The number of currently running threads */
  86         unsigned int tg_running_count;
  87         /* Set during cleanup to indicate that the benchmark is over. */
  88         bool tg_done;
  89         size_t tg_current_iteration;
  90         size_t tg_iterations_completed;
  91         unsigned int tg_num_threads;
  92         test_variant_t tg_variant;
  93         /*
  94          * An array of memory objects to fault in.
  95          * This is basically a workqueue of
  96          * contiguous chunks of memory that the worker threads
  97          * will fault in.
  98          */
  99         fault_buffer_t *tg_fault_buffer_arr;
 100         size_t tg_fault_buffer_arr_length;
 101         /*
 102          * To avoid false sharing, we pad the test globals with an extra cache line and place the atomic
 103          * next_fault_buffer_index size_t after the cache line.
 104          */
 105         __unused char padding[];
 106         /*
 107          * This field is directly after the padding buffer.
 108          * It is used to synchronize access to tg_fault_buffer_arr.
 109          */
 110         //_Atomic size_t tg_next_fault_buffer_index;
 111 } test_globals_t;
 112
 113 static const char* kSeparateObjectsArgument = "separate-objects";
 114 static const char* kShareObjectsArgument = "share-objects";
 115
 116 /* Arguments parsed from the command line */
 117 typedef struct test_args {
 118         uint32_t n_threads;
 119         uint64_t duration_seconds;
 120         test_variant_t variant;
 121         bool verbose;
 122 } test_args_t;
 123
 124 /* Get a (wall-time) timestamp in nanoseconds */
 125 static uint64_t get_timestamp_ns(void);
 126 /* Get the number of cpus on this device. */
 127 static unsigned int get_ncpu(void);
 128 /*
 129  * Fault in the pages in the given buffer.
 130  */
 131 static void fault_pages(fault_buffer_t *buffer, size_t stride);
 132 /* Get a unique fault buffer from the global work queue. */
 133 static fault_buffer_t *get_fault_buffer(test_globals_t* globals);
 134 /*
 135  * Grabs buffers from the global test structure and faults them in, using this
 136  * test variant's stride, until there are no more buffers to grab.
 137  * Returns the number of microseconds spent on-cpu.
 138  */
 139 static uint64_t grab_and_fault_pages(test_globals_t* globals);
 140
 141 static bool worker_thread_iteration_setup(size_t current_iteration, test_globals_t *globals);
 142 static void worker_thread_iteration_complete(test_globals_t *globals);
 143
 144 static void parse_arguments(int argc, char **argv, test_args_t *args);
 145 /*
 146  * Sets up the test globals and spawns the background threads to do the faults.
 147  * Returns an array of size `num_threads`
 148  * Containing the thread ids of the forked threads.
 149  */
 150 static pthread_t* setup_test(test_globals_t *globals, const test_args_t *args, size_t memory_size, bool verbose);
 151 static test_globals_t *allocate_test_globals(void);
 152 /* Initializes variables in the globals array. */
 153 static void init_globals(test_globals_t *globals, const test_args_t *args);
 154 static inline _Atomic size_t *next_fault_buffer_index_ptr(test_globals_t *globals);
 155 /*
 156  * Called on the main thread.
 157  * Waits for the background threads to be ready, sets up the memory objects,
 158  * and then starts a faulting iteration.
 159  * Returns the start (wall) time.
 160  */
 161 static uint64_t start_iteration(test_globals_t* globals, test_variant_t variant, bool verbose);
 162 /*
 163  * Called on the main thread.
 164  * Waits for the background threads to complete the iteration and cleans up.
 165  * Returns the total amount of time spent faulting pages in nanoseconds by all threads thus far.
 166  */
 167 static uint64_t finish_iteration(test_globals_t *globals, uint64_t start_time);
 168 /*
 169  * Called on the main thread.
 170  * Maps buffers and places them in the work queue.
 171  */
 172 static void setup_memory(test_globals_t* globals, test_variant_t variant);
 173 /*
 174  * Dump test results as a csv to stdout.
 175  * Use fault_throughput.lua to convert to perfdata.
 176  */
 177 static void output_results(const test_globals_t *globals, double walltime_elapsed_seconds, double cputime_elapsed_seconds);
 178 static void cleanup_test(test_globals_t *globals);
 179 /*
 180  * Join the background threads and return the total microseconds
 181  * of cpu time spent faulting across all of the threads.
 182  * Takes ownership of the threads array and frees it.
 183  */
 184 static uint64_t join_background_threads(test_globals_t *globals, pthread_t *threads);
 185 static void unmap_fault_buffers(test_globals_t *globals);
 186 /*
 187  * Get the stride between each vm object in the fault buffer array.
 188  */
 189 static size_t fault_buffer_stride(const test_globals_t *globals);
 190
 191 int
 192 main(int argc, char **argv)
 193 {
 194         /* How much memory should the test consume (per-core on the system)? */
 195 #if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
 196         static const size_t memory_per_core = kVmObjectSize;
 197 #else
 198         static const size_t memory_per_core = 25 * (1UL << 20);
 199 #endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
 200         const size_t kMemSize = memory_per_core * get_ncpu();
 201         test_globals_t *globals = allocate_test_globals();
 202         /* Total wall-time spent faulting in pages. */
 203         uint64_t wall_time_elapsed_ns = 0;
 204         /* Total cpu-time spent faulting in pages */
 205         uint64_t cpu_time_faulting_us = 0;
 206         uint64_t start_time_ns;
 207         test_args_t args;
 208         parse_arguments(argc, argv, &args);
 209         pthread_t* threads = setup_test(globals, &args, kMemSize, args.verbose);
 210
 211         /* Keep doing more iterations until we've hit our (wall) time budget */
 212         while (wall_time_elapsed_ns < args.duration_seconds * kNumNanosecondsInSecond) {
 213                 benchmark_log(args.verbose, "----Starting Iteration %lu-----\n", globals->tg_current_iteration + 1);
 214                 start_time_ns = start_iteration(globals, args.variant, args.verbose);
 215                 wall_time_elapsed_ns += finish_iteration(globals, start_time_ns);
 216                 benchmark_log(args.verbose, "----Completed Iteration %lu----\n", globals->tg_current_iteration);
 217         }
 218
 219         benchmark_log(args.verbose, "Hit time budget\nJoining worker threads\n");
 220         cpu_time_faulting_us = join_background_threads(globals, threads);
 221         benchmark_log(args.verbose, "----End Test Output----\n");
 222         output_results(globals, (double) wall_time_elapsed_ns / kNumNanosecondsInSecond,
 223             (double)cpu_time_faulting_us / kNumMicrosecondsInSecond);
 224         cleanup_test(globals);
 225
 226         return 0;
 227 }
 228
 229
 230 /* The main loop for the worker threads. */
 231 static void*
 232 faulting_thread(void* arg)
 233 {
 234         test_globals_t* globals = arg;
 235         uint64_t on_cpu_time_faulting = 0;
 236         size_t current_iteration = 1;
 237         while (true) {
 238                 bool should_continue = worker_thread_iteration_setup(current_iteration, globals);
 239                 if (!should_continue) {
 240                         break;
 241                 }
 242                 on_cpu_time_faulting += grab_and_fault_pages(globals);
 243                 worker_thread_iteration_complete(globals);
 244                 current_iteration++;
 245         }
 246         return (void*)on_cpu_time_faulting;
 247 }
 248
 249 /*
 250  * Called on the worker threads before each iteration to synchronize this
 251  * iteration start with the other threads.
 252  * Returns true if the iteration should continue, and false if the test is over.
 253  */
 254 static bool
 255 worker_thread_iteration_setup(size_t current_iteration, test_globals_t *globals)
 256 {
 257         bool should_continue = false;
 258         int ret = 0;
 259         // Gate on the other threads being ready to start
 260         ret = pthread_mutex_lock(&globals->tg_lock);
 261         assert(ret == 0);
 262         globals->tg_running_count++;
 263         if (globals->tg_running_count == globals->tg_num_threads) {
 264                 // All the worker threads are running.
 265                 // Wake up the main thread so that it can ungate the test.
 266                 ret = pthread_cond_broadcast(&globals->tg_cv);
 267                 assert(ret == 0);
 268         }
 269         /*
 270          * The main thread will start this iteration by incrementing
 271          * tg_current_iteration. Block until that happens.
 272          * See start_iteration for the wakeup code.
 273          */
 274         while (!globals->tg_done && globals->tg_current_iteration != current_iteration) {
 275                 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
 276                 assert(ret == 0);
 277         }
 278         should_continue = !globals->tg_done;
 279         ret = pthread_mutex_unlock(&globals->tg_lock);
 280         assert(ret == 0);
 281         return should_continue;
 282 }
 283
 284 /*
 285  * Called on the worker threads before each iteration finishes to synchronize
 286  * with the other threads.
 287  */
 288 static void
 289 worker_thread_iteration_complete(test_globals_t *globals)
 290 {
 291         int ret;
 292         // Mark ourselves as done and wait for the other threads to finish
 293         ret = pthread_mutex_lock(&globals->tg_lock);
 294         assert(ret == 0);
 295         globals->tg_running_count--;
 296         if (globals->tg_running_count == 0) {
 297                 // We're the last one to finish. Mark this iteration as completed and wake everyone up.
 298                 globals->tg_iterations_completed++;
 299                 ret = pthread_cond_broadcast(&globals->tg_cv);
 300                 assert(ret == 0);
 301         } else {
 302                 // Others are running. Wait for them to finish.
 303                 while (globals->tg_iterations_completed != globals->tg_current_iteration) {
 304                         ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
 305                         assert(ret == 0);
 306                 }
 307         }
 308         ret = pthread_mutex_unlock(&globals->tg_lock);
 309         assert(ret == 0);
 310 }
 311
 312 static void
 313 fault_pages(fault_buffer_t *buffer, size_t stride)
 314 {
 315         volatile unsigned char val;
 316         for (unsigned char* ptr = buffer->fb_start; ptr < buffer->fb_start + buffer->fb_size; ptr += stride) {
 317                 val = *ptr;
 318         }
 319 }
 320
 321 static fault_buffer_t *
 322 get_fault_buffer(test_globals_t* globals)
 323 {
 324         size_t index = atomic_fetch_add_explicit(next_fault_buffer_index_ptr(globals), 1UL, memory_order_acq_rel);
 325         if (index < globals->tg_fault_buffer_arr_length) {
 326                 return &globals->tg_fault_buffer_arr[index];
 327         }
 328         return NULL;
 329 }
 330
 331 static uint64_t
 332 grab_and_fault_pages(test_globals_t* globals)
 333 {
 334         struct timespec start_time, end_time;
 335         uint64_t nanoseconds_faulting_on_cpu = 0;
 336         int ret;
 337         size_t stride = fault_buffer_stride(globals) * kPageSize;
 338         while (true) {
 339                 fault_buffer_t *object = get_fault_buffer(globals);
 340                 if (object == NULL) {
 341                         break;
 342                 }
 343                 ret = clock_gettime(kThreadCPUTimeClock, &start_time);
 344                 assert(ret == 0);
 345
 346                 fault_pages(object, stride);
 347
 348                 ret = clock_gettime(kThreadCPUTimeClock, &end_time);
 349                 assert(ret == 0);
 350                 nanoseconds_faulting_on_cpu += (unsigned long) timespec_difference_us(&end_time, &start_time);
 351         }
 352         return nanoseconds_faulting_on_cpu;
 353 }
 354
 355 static uint64_t
 356 start_iteration(test_globals_t* globals, test_variant_t variant, bool verbose)
 357 {
 358         int ret;
 359         uint64_t start_time;
 360         ret = pthread_mutex_lock(&globals->tg_lock);
 361         assert(ret == 0);
 362         benchmark_log(verbose, "Waiting for workers to catch up before starting next iteration.\n");
 363         /* Wait until all the threads are ready to go to the next iteration */
 364         while (globals->tg_running_count != globals->tg_num_threads) {
 365                 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
 366         }
 367         benchmark_log(verbose, "Workers are all caught up\n");
 368         setup_memory(globals, variant);
 369         benchmark_log(verbose, "Initialized data structures for iteration. Waking workers.\n");
 370         /* Grab a timestamp, tick the current iteration, and wake up the worker threads */
 371         start_time = get_timestamp_ns();
 372         globals->tg_current_iteration++;
 373         ret = pthread_mutex_unlock(&globals->tg_lock);
 374         assert(ret == 0);
 375         ret = pthread_cond_broadcast(&globals->tg_cv);
 376         assert(ret == 0);
 377         return start_time;
 378 }
 379
 380 static uint64_t
 381 finish_iteration(test_globals_t* globals, uint64_t start_time)
 382 {
 383         int ret;
 384         uint64_t end_time;
 385         ret = pthread_mutex_lock(&globals->tg_lock);
 386         assert(ret == 0);
 387         while (globals->tg_iterations_completed != globals->tg_current_iteration) {
 388                 ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
 389         }
 390         end_time = get_timestamp_ns();
 391         ret = pthread_mutex_unlock(&globals->tg_lock);
 392         unmap_fault_buffers(globals);
 393         assert(ret == 0);
 394         return end_time - start_time;
 395 }
 396
 397 static void
 398 setup_memory(test_globals_t* globals, test_variant_t variant)
 399 {
 400         size_t stride = fault_buffer_stride(globals);
 401         for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
 402                 fault_buffer_t *object = &globals->tg_fault_buffer_arr[i];
 403                 object->fb_start = mmap_buffer(kVmObjectSize);
 404                 object->fb_size = kVmObjectSize;
 405                 if (variant == VARIANT_SHARE_VM_OBJECTS) {
 406                         /*
 407                          * Insert another buffer into the work queue for each thread.
 408                          * Each buffer starts 1 page past where the previous buffer started into the vm object.
 409                          * Since each thread strides by the number of threads * the page size they won't fault in the same pages.
 410                          */
 411                         for (size_t j = 1; j < globals->tg_num_threads; j++) {
 412                                 size_t offset = kPageSize * j;
 413                                 fault_buffer_t *offset_object = &globals->tg_fault_buffer_arr[i + j];
 414                                 offset_object->fb_start = object->fb_start + offset;
 415                                 offset_object->fb_size = object->fb_size - offset;
 416                         }
 417                 } else if (variant != VARIANT_SEPARATE_VM_OBJECTS) {
 418                         fprintf(stderr, "Unknown test variant.\n");
 419                         exit(2);
 420                 }
 421         }
 422         atomic_store_explicit(next_fault_buffer_index_ptr(globals), 0, memory_order_release);
 423 }
 424
 425 static void
 426 unmap_fault_buffers(test_globals_t* globals)
 427 {
 428         size_t stride = fault_buffer_stride(globals);
 429         for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
 430                 fault_buffer_t *buffer = &globals->tg_fault_buffer_arr[i];
 431                 int res = munmap(buffer->fb_start, buffer->fb_size);
 432                 assert(res == 0);
 433         }
 434 }
 435
 436 static test_globals_t *
 437 allocate_test_globals()
 438 {
 439         test_globals_t *globals = NULL;
 440         int ret;
 441         if (kCacheLineSize == 0) {
 442                 size_t cachelinesize_size = sizeof(kCacheLineSize);
 443                 ret = sysctlbyname("hw.cachelinesize", &kCacheLineSize, &cachelinesize_size, NULL, 0);
 444                 assert(ret == 0);
 445                 assert(kCacheLineSize > 0);
 446         }
 447         if (kPageSize == 0) {
 448                 size_t pagesize_size = sizeof(kPageSize);
 449                 ret = sysctlbyname("vm.pagesize", &kPageSize, &pagesize_size, NULL, 0);
 450                 assert(ret == 0);
 451                 assert(kPageSize > 0);
 452         }
 453         size_t test_globals_size = sizeof(test_globals_t) + kCacheLineSize + sizeof(_Atomic size_t);
 454         globals = malloc(test_globals_size);
 455         assert(globals != NULL);
 456         memset(globals, 0, test_globals_size);
 457         return globals;
 458 }
 459
 460 static void
 461 init_globals(test_globals_t *globals, const test_args_t *args)
 462 {
 463         pthread_mutexattr_t mutex_attrs;
 464         pthread_condattr_t cond_attrs;
 465         int ret;
 466         memset(globals, 0, sizeof(test_globals_t));
 467
 468         ret = pthread_mutexattr_init(&mutex_attrs);
 469         assert(ret == 0);
 470         ret = pthread_mutex_init(&globals->tg_lock, &mutex_attrs);
 471         assert(ret == 0);
 472         ret = pthread_condattr_init(&cond_attrs);
 473         assert(ret == 0);
 474         ret = pthread_cond_init(&globals->tg_cv, &cond_attrs);
 475         assert(ret == 0);
 476         ret = pthread_mutexattr_destroy(&mutex_attrs);
 477         assert(ret == 0);
 478         ret = pthread_condattr_destroy(&cond_attrs);
 479         assert(ret == 0);
 480
 481         globals->tg_num_threads = args->n_threads;
 482         globals->tg_variant = args->variant;
 483 }
 484
 485 static void
 486 init_fault_buffer_arr(test_globals_t *globals, const test_args_t *args, size_t memory_size)
 487 {
 488         if (args->variant == VARIANT_SEPARATE_VM_OBJECTS) {
 489                 // This variant creates separate vm objects up to memory size bytes total
 490                 globals->tg_fault_buffer_arr_length = memory_size / kVmObjectSize;
 491         } else if (args->variant == VARIANT_SHARE_VM_OBJECTS) {
 492                 // This variant creates separate vm objects up to memory size bytes total
 493                 // And places a pointer into each vm object for each thread.
 494                 globals->tg_fault_buffer_arr_length = memory_size / kVmObjectSize * globals->tg_num_threads;
 495         } else {
 496                 fprintf(stderr, "Unsupported test variant.\n");
 497                 exit(2);
 498         }
 499         // It doesn't make sense to have more threads than elements in the work queue.
 500         // NB: Since we scale memory_size by ncpus, this can only happen if the user
 501         // tries to run the benchmark with many more threads than cores.
 502         assert(globals->tg_fault_buffer_arr_length >= globals->tg_num_threads);
 503         globals->tg_fault_buffer_arr = calloc(sizeof(fault_buffer_t), globals->tg_fault_buffer_arr_length);
 504         assert(globals->tg_fault_buffer_arr);
 505 }
 506
 507 static pthread_t *
 508 spawn_worker_threads(test_globals_t *globals, unsigned int num_threads)
 509 {
 510         int ret;
 511         pthread_attr_t pthread_attrs;
 512         globals->tg_num_threads = num_threads;
 513         pthread_t* threads = malloc(sizeof(pthread_t) * num_threads);
 514         assert(threads);
 515         ret = pthread_attr_init(&pthread_attrs);
 516         assert(ret == 0);
 517         // Spawn the background threads
 518         for (unsigned int i = 0; i < num_threads; i++) {
 519                 ret = pthread_create(threads + i, &pthread_attrs, faulting_thread, globals);
 520                 assert(ret == 0);
 521         }
 522         ret = pthread_attr_destroy(&pthread_attrs);
 523         assert(ret == 0);
 524         return threads;
 525 }
 526
 527 static pthread_t*
 528 setup_test(test_globals_t *globals, const test_args_t *args, size_t memory_size, bool verbose)
 529 {
 530         init_globals(globals, args);
 531         init_fault_buffer_arr(globals, args, memory_size);
 532         benchmark_log(verbose, "Initialized global data structures.\n");
 533         pthread_t *workers = spawn_worker_threads(globals, args->n_threads);
 534         benchmark_log(verbose, "Spawned workers.\n");
 535         return workers;
 536 }
 537
 538 static uint64_t
 539 join_background_threads(test_globals_t *globals, pthread_t *threads)
 540 {
 541         // Set the done flag so that the background threads exit
 542         int ret;
 543         uint64_t total_cputime_spent_faulting = 0;
 544         ret = pthread_mutex_lock(&globals->tg_lock);
 545         assert(ret == 0);
 546         globals->tg_done = true;
 547         ret = pthread_cond_broadcast(&globals->tg_cv);
 548         assert(ret == 0);
 549         ret = pthread_mutex_unlock(&globals->tg_lock);
 550         assert(ret == 0);
 551
 552         // Join the background threads
 553         for (unsigned int i = 0; i < globals->tg_num_threads; i++) {
 554                 uint64_t cputime_spent_faulting = 0;
 555                 ret = pthread_join(threads[i], (void **)&cputime_spent_faulting);
 556                 assert(ret == 0);
 557                 total_cputime_spent_faulting += cputime_spent_faulting;
 558         }
 559         free(threads);
 560         return total_cputime_spent_faulting;
 561 }
 562
 563 static void
 564 cleanup_test(test_globals_t* globals)
 565 {
 566         int ret;
 567         ret = pthread_mutex_destroy(&globals->tg_lock);
 568         assert(ret == 0);
 569         ret = pthread_cond_destroy(&globals->tg_cv);
 570         assert(ret == 0);
 571         free(globals->tg_fault_buffer_arr);
 572         free(globals);
 573 }
 574
 575 static void
 576 output_results(const test_globals_t* globals, double walltime_elapsed_seconds, double cputime_elapsed_seconds)
 577 {
 578         size_t pgsize;
 579         size_t sysctl_size = sizeof(pgsize);
 580         int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0);
 581         assert(ret == 0);
 582         size_t num_pages = 0;
 583         double walltime_throughput, cputime_throughput;
 584         size_t stride = fault_buffer_stride(globals);
 585         for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
 586                 num_pages += globals->tg_fault_buffer_arr[i].fb_size / pgsize;
 587         }
 588         num_pages *= globals->tg_iterations_completed;
 589         walltime_throughput = num_pages / walltime_elapsed_seconds;
 590         cputime_throughput = num_pages / cputime_elapsed_seconds;
 591         printf("-----Results-----\n");
 592         printf("Throughput (pages / wall second), Throughput (pages / CPU second)\n");
 593         printf("%f,%f\n", walltime_throughput, cputime_throughput);
 594 }
 595
 596 static void
 597 print_help(char** argv)
 598 {
 599         fprintf(stderr, "%s: <test-variant> [-v] duration num_threads\n", argv[0]);
 600         fprintf(stderr, "\ntest variants:\n");
 601         fprintf(stderr, "       %s      Fault in different vm objects in each thread.\n", kSeparateObjectsArgument);
 602         fprintf(stderr, "       %s              Share vm objects across faulting threads.\n", kShareObjectsArgument);
 603 }
 604
 605 static uint64_t
 606 get_timestamp_ns()
 607 {
 608         return clock_gettime_nsec_np(kWallTimeClock);
 609 }
 610
 611 static unsigned int
 612 get_ncpu(void)
 613 {
 614         int ncpu;
 615         size_t sysctl_size = sizeof(ncpu);
 616         int ret = sysctlbyname("hw.ncpu", &ncpu, &sysctl_size, NULL, 0);
 617         assert(ret == 0);
 618         return (unsigned int) ncpu;
 619 }
 620
 621 static void
 622 parse_arguments(int argc, char** argv, test_args_t *args)
 623 {
 624         int current_argument = 1;
 625         memset(args, 0, sizeof(test_args_t));
 626         if (argc < 4 || argc > 6) {
 627                 print_help(argv);
 628                 exit(1);
 629         }
 630         if (argv[current_argument][0] == '-') {
 631                 if (strcmp(argv[current_argument], "-v") == 0) {
 632                         args->verbose = true;
 633                 } else {
 634                         fprintf(stderr, "Unknown argument %s\n", argv[current_argument]);
 635                         print_help(argv);
 636                         exit(1);
 637                 }
 638                 current_argument++;
 639         }
 640         if (strncasecmp(argv[current_argument], kSeparateObjectsArgument, strlen(kSeparateObjectsArgument)) == 0) {
 641                 args->variant = VARIANT_SEPARATE_VM_OBJECTS;
 642         } else if (strncasecmp(argv[current_argument], kShareObjectsArgument, strlen(kShareObjectsArgument)) == 0) {
 643                 args->variant = VARIANT_SHARE_VM_OBJECTS;
 644         } else {
 645                 print_help(argv);
 646                 exit(1);
 647         }
 648         current_argument++;
 649
 650         long duration = strtol(argv[current_argument++], NULL, 10);
 651         if (duration == 0) {
 652                 print_help(argv);
 653                 exit(1);
 654         }
 655         long num_cores = strtol(argv[current_argument++], NULL, 10);
 656         if (num_cores == 0) {
 657                 print_help(argv);
 658                 exit(1);
 659         }
 660         assert(num_cores > 0 && num_cores <= get_ncpu());
 661         args->n_threads = (unsigned int) num_cores;
 662         args->duration_seconds = (unsigned long) duration;
 663 }
 664
 665 static inline
 666 _Atomic size_t *
 667 next_fault_buffer_index_ptr(test_globals_t *globals)
 668 {
 669         return (_Atomic size_t *) (((ptrdiff_t)(globals + 1)) + (int64_t)kCacheLineSize);
 670 }
 671 static size_t
 672 fault_buffer_stride(const test_globals_t *globals)
 673 {
 674         size_t stride;
 675         if (globals->tg_variant == VARIANT_SEPARATE_VM_OBJECTS) {
 676                 stride = 1;
 677         } else if (globals->tg_variant == VARIANT_SHARE_VM_OBJECTS) {
 678                 stride = globals->tg_num_threads;
 679         } else {
 680                 fprintf(stderr, "Unknown variant\n");
 681                 exit(-1);
 682         }
 683         return stride;
 684 }