tools/tests/zero-to-n/zero-to-n.c

   1 /*
   2  * Copyright (c) 2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <unistd.h>
  29 #include <stdio.h>
  30 #include <math.h>
  31 #include <sys/wait.h>
  32 #include <sys/syscall.h>
  33 #include <sys/types.h>
  34 #include <sys/ptrace.h>
  35 #include <semaphore.h>
  36 #include <stdlib.h>
  37 #include <pthread.h>
  38 #include <fcntl.h>
  39 #include <errno.h>
  40 #include <string.h>
  41
  42 #include <libkern/OSAtomic.h>
  43
  44 #include <mach/mach_time.h>
  45 #include <mach/mach.h>
  46 #include <mach/task.h>
  47 #include <mach/semaphore.h>
  48
  49 typedef enum wake_type { WAKE_BROADCAST_ONESEM, WAKE_BROADCAST_PERTHREAD, WAKE_CHAIN } wake_type_t;
  50 typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_FIXEDPRI } my_policy_type_t;
  51
  52 #define assert(truth, label) do { if(!(truth)) { printf("Thread %p: failure on line %d\n", pthread_self(), __LINE__); goto label; } } while (0)
  53
  54 #define CONSTRAINT_NANOS        (20000000ll)    /* 20 ms */
  55 #define COMPUTATION_NANOS       (10000000ll)    /* 10 ms */
  56 #define TRACEWORTHY_NANOS       (10000000ll)    /* 10 ms */
  57
  58 #if DEBUG
  59 #define debug_log(args...) printf(args)
  60 #else
  61 #define debug_log(args...) do { } while(0)
  62 #endif
  63
  64 /* Declarations */
  65 void*                   child_thread_func(void *arg);
  66 void                    print_usage();
  67 int                     thread_setup(int my_id);
  68 my_policy_type_t        parse_thread_policy(const char *str);
  69 int                     thread_finish_iteration();
  70
  71 /* Global variables (general) */
  72 int                     g_numthreads;
  73 wake_type_t             g_waketype;
  74 policy_t                g_policy;
  75 int                     g_iterations;
  76 struct mach_timebase_info g_mti;
  77 semaphore_t             g_main_sem;
  78 uint64_t                *g_thread_endtimes_abs;
  79 volatile int32_t        g_done_threads;
  80 boolean_t               g_do_spin = FALSE;
  81 boolean_t               g_verbose = FALSE;
  82 boolean_t               g_do_affinity = FALSE;
  83 uint64_t                g_starttime_abs;
  84 #if MIMIC_DIGI_LEAD_TIME
  85 int                     g_long_spinid;
  86 uint64_t                g_spinlength_abs;
  87 #endif /* MIMIC_DIGI_LEAD_TIME */
  88
  89 /* Global variables (broadcast) */
  90 semaphore_t             g_machsem;
  91 semaphore_t             g_leadersem;
  92
  93 /* Global variables (chain) */
  94 semaphore_t             *g_semarr;
  95
  96 uint64_t
  97 abs_to_nanos(uint64_t abstime)
  98 {
  99         return (uint64_t)(abstime * (((double)g_mti.numer) / ((double)g_mti.denom)));
 100 }
 101
 102 uint64_t
 103 nanos_to_abs(uint64_t ns)
 104 {
 105         return (uint64_t)(ns * (((double)g_mti.denom) / ((double)g_mti.numer)));
 106 }
 107
 108 /*
 109  * Figure out what thread policy to use
 110  */
 111 my_policy_type_t
 112 parse_thread_policy(const char *str)
 113 {
 114         if (strcmp(str, "timeshare") == 0) {
 115                 return MY_POLICY_TIMESHARE;
 116         } else if (strcmp(str, "realtime") == 0) {
 117                 return MY_POLICY_REALTIME;
 118         } else if (strcmp(str, "fixed") == 0) {
 119                 return MY_POLICY_FIXEDPRI;
 120         } else {
 121                 printf("Invalid thread policy %s\n", str);
 122                 exit(1);
 123         }
 124 }
 125
 126 /*
 127  * Figure out what wakeup pattern to use
 128  */
 129 wake_type_t
 130 parse_wakeup_pattern(const char *str)
 131 {
 132         if (strcmp(str, "chain") == 0) {
 133                 return WAKE_CHAIN;
 134         } else if (strcmp(str, "broadcast-single-sem") == 0) {
 135                 return WAKE_BROADCAST_ONESEM;
 136         } else if (strcmp(str, "broadcast-per-thread") == 0) {
 137                 return WAKE_BROADCAST_PERTHREAD;
 138         } else {
 139                 print_usage();
 140                 exit(1);
 141         }
 142 }
 143
 144 /*
 145  * Set policy
 146  */
 147 int
 148 thread_setup(int my_id)
 149 {
 150         int res;
 151
 152         switch (g_policy) {
 153                 case MY_POLICY_TIMESHARE:
 154                 {
 155                         return 0;
 156                 }
 157                 case MY_POLICY_REALTIME:
 158                 {
 159                         thread_time_constraint_policy_data_t pol;
 160
 161                         /* Hard-coded realtime parameters (similar to what Digi uses) */
 162                         pol.period = 100000;
 163                         pol.constraint =  nanos_to_abs(CONSTRAINT_NANOS);
 164                         pol.computation = nanos_to_abs(COMPUTATION_NANOS);
 165                         pol.preemptible = 0; /* Ignored by OS */
 166
 167                         res = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
 168                         assert(res == 0, fail);
 169                         break;
 170                 }
 171                 case MY_POLICY_FIXEDPRI:
 172                 {
 173                         thread_extended_policy_data_t pol;
 174                         pol.timeshare = 0;
 175
 176                         res = thread_policy_set(mach_thread_self(), THREAD_EXTENDED_POLICY, (thread_policy_t) &pol, THREAD_EXTENDED_POLICY_COUNT);
 177                         assert(res == 0, fail);
 178                         break;
 179                 }
 180                 default:
 181                 {
 182                         printf("invalid policy type\n");
 183                         return 1;
 184                 }
 185         }
 186
 187         if (g_do_affinity) {
 188                 thread_affinity_policy_data_t affinity;
 189
 190                 affinity.affinity_tag = my_id % 2;
 191
 192                 res = thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY, (thread_policy_t)&affinity, THREAD_AFFINITY_POLICY_COUNT);
 193                 assert(res == 0, fail);
 194         }
 195
 196         return 0;
 197 fail:
 198         return 1;
 199 }
 200
 201 /*
 202  * Wake up main thread if everyone's done
 203  */
 204 int
 205 thread_finish_iteration(int id)
 206 {
 207         int32_t new;
 208         int res = 0;
 209         volatile float x = 0.0;
 210         volatile float y = 0.0;
 211
 212         debug_log("Thread %p finished iteration.\n", pthread_self());
 213
 214 #if MIMIC_DIGI_LEAD_TIME
 215         /*
 216          * One randomly chosen thread determines when everybody gets to stop.
 217          */
 218         if (g_do_spin) {
 219                 if (g_long_spinid == id) {
 220                         uint64_t endspin;
 221
 222                         /* This thread took up fully half of his computation */
 223                         endspin = g_starttime_abs + g_spinlength_abs;
 224                         while (mach_absolute_time() < endspin) {
 225                                 y = y + 1.5 + x;
 226                                 x = sqrt(y);
 227                         }
 228                 }
 229         }
 230 #endif /* MIMIC_DIGI_LEAD_TIME */
 231
 232         new = OSAtomicIncrement32(&g_done_threads);
 233
 234         debug_log("New value is %d\n", new);
 235
 236         /*
 237          * When the last thread finishes, everyone gets to go back to sleep.
 238          */
 239         if (new == g_numthreads) {
 240                 debug_log("Thread %p signalling main thread.\n", pthread_self());
 241                 res = semaphore_signal(g_main_sem);
 242         } else {
 243 #ifndef MIMIC_DIGI_LEAD_TIME
 244                 if (g_do_spin) {
 245                         while (g_done_threads < g_numthreads) {
 246                                 y = y + 1.5 + x;
 247                                 x = sqrt(y);
 248                         }
 249                 }
 250 #endif
 251         }
 252
 253         return res;
 254 }
 255
 256 /*
 257  * Wait for a wakeup, potentially wake up another of the "0-N" threads,
 258  * and notify the main thread when done.
 259  */
 260 void*
 261 child_thread_func(void *arg)
 262 {
 263         int my_id = (int)(uintptr_t)arg;
 264         int res;
 265         int i, j;
 266         int32_t new;
 267
 268         /* Set policy and so forth */
 269         thread_setup(my_id);
 270
 271         /* Tell main thread when everyone has set up */
 272         new = OSAtomicIncrement32(&g_done_threads);
 273         if (new == g_numthreads) {
 274                 semaphore_signal(g_main_sem);
 275         }
 276
 277         /* For each iteration */
 278         for (i = 0; i < g_iterations; i++) {
 279                 /*
 280                  * Leader thread either wakes everyone up or starts the chain going.
 281                  */
 282                 if (my_id == 0) {
 283                         res = semaphore_wait(g_leadersem);
 284                         assert(res == 0, fail);
 285
 286                         g_thread_endtimes_abs[my_id] = mach_absolute_time();
 287
 288 #if MIMIC_DIGI_LEAD_TIME
 289                         g_long_spinid = rand() % g_numthreads;
 290 #endif /* MIMIC_DIGI_LEAD_TIME */
 291
 292                         switch (g_waketype) {
 293                         case WAKE_CHAIN:
 294                                 semaphore_signal(g_semarr[my_id + 1]);
 295                                 break;
 296                         case WAKE_BROADCAST_ONESEM:
 297                                 semaphore_signal_all(g_machsem);
 298                                 break;
 299                         case WAKE_BROADCAST_PERTHREAD:
 300                                 for (j = 1; j < g_numthreads; j++) {
 301                                         semaphore_signal(g_semarr[j]);
 302                                 }
 303                                 break;
 304                         default:
 305                                 printf("Invalid wakeup type?!\n");
 306                                 exit(1);
 307                         }
 308                 } else {
 309                         /*
 310                          * Everyone else waits to be woken up,
 311                          * records when she wake up, and possibly
 312                          * wakes up a friend.
 313                          */
 314                         switch(g_waketype)  {
 315                         case WAKE_BROADCAST_ONESEM:
 316                                 res = semaphore_wait(g_machsem);
 317                                 assert(res == KERN_SUCCESS, fail);
 318
 319                                 g_thread_endtimes_abs[my_id] = mach_absolute_time();
 320
 321                                 break;
 322                                 /*
 323                                  * For the chain wakeup case:
 324                                  * wait, record time, signal next thread if appropriate
 325                                  */
 326                         case WAKE_BROADCAST_PERTHREAD:
 327                                 res = semaphore_wait(g_semarr[my_id]);
 328                                 assert(res == 0, fail);
 329
 330                                 g_thread_endtimes_abs[my_id] = mach_absolute_time();
 331                                 break;
 332
 333                         case WAKE_CHAIN:
 334                                 res = semaphore_wait(g_semarr[my_id]);
 335                                 assert(res == 0, fail);
 336
 337                                 g_thread_endtimes_abs[my_id] = mach_absolute_time();
 338
 339                                 if (my_id < (g_numthreads - 1)) {
 340                                         res = semaphore_signal(g_semarr[my_id + 1]);
 341                                         assert(res == 0, fail);
 342                                 }
 343
 344                                 break;
 345                         default:
 346                                 printf("Invalid wake type.\n");
 347                                 goto fail;
 348                         }
 349                 }
 350
 351                 res = thread_finish_iteration(my_id);
 352                 assert(res == 0, fail);
 353         }
 354
 355         return 0;
 356 fail:
 357         exit(1);
 358 }
 359
 360 /*
 361  * Admittedly not very attractive.
 362  */
 363 void
 364 print_usage()
 365 {
 366         printf("Usage: zn <num threads> <chain | broadcast-single-sem | broadcast-per-thread> <realtime | timeshare | fixed> <num iterations> [-trace  <traceworthy latency in ns>] [-spin] [-affinity] [-verbose]\n");
 367 }
 368
 369 /*
 370  * Given an array of uint64_t values, compute average, max, min, and standard deviation
 371  */
 372 void
 373 compute_stats(uint64_t *values, uint64_t count, float *averagep, uint64_t *maxp, uint64_t *minp, float *stddevp)
 374 {
 375         int i;
 376         uint64_t _sum = 0;
 377         uint64_t _max = 0;
 378         uint64_t _min = UINT64_MAX;
 379         float    _avg = 0;
 380         float    _dev = 0;
 381
 382         for (i = 0; i < count; i++) {
 383                 _sum += values[i];
 384                 _max = values[i] > _max ? values[i] : _max;
 385                 _min = values[i] < _min ? values[i] : _min;
 386         }
 387
 388         _avg = ((float)_sum) / ((float)count);
 389
 390         _dev = 0;
 391         for (i = 0; i < count; i++) {
 392                 _dev += powf((((float)values[i]) - _avg), 2);
 393         }
 394
 395         _dev /= count;
 396         _dev = sqrtf(_dev);
 397
 398         *averagep = _avg;
 399         *maxp = _max;
 400         *minp = _min;
 401         *stddevp = _dev;
 402 }
 403
 404 int
 405 main(int argc, char **argv)
 406 {
 407         int             i;
 408         int             res;
 409         pthread_t       *threads;
 410         uint64_t        *worst_latencies_ns;
 411         uint64_t        *worst_latencies_from_first_ns;
 412         uint64_t        last_end;
 413         uint64_t        max, min;
 414         uint64_t        traceworthy_latency_ns = TRACEWORTHY_NANOS;
 415         float           avg, stddev;
 416
 417         srand(time(NULL));
 418
 419         if (argc < 5 || argc > 9) {
 420                 print_usage();
 421                 goto fail;
 422         }
 423
 424         /* How many threads? */
 425         g_numthreads = atoi(argv[1]);
 426
 427         /* What wakeup pattern? */
 428         g_waketype = parse_wakeup_pattern(argv[2]);
 429
 430         /* Policy */
 431         g_policy = parse_thread_policy(argv[3]);
 432
 433         /* Iterations */
 434         g_iterations = atoi(argv[4]);
 435
 436         /* Optional args */
 437         for (i = 5; i < argc; i++) {
 438                 if (strcmp(argv[i], "-spin") == 0) {
 439                         g_do_spin = TRUE;
 440                 } else if (strcmp(argv[i], "-verbose") == 0) {
 441                         g_verbose = TRUE;
 442                 } else if ((strcmp(argv[i], "-trace") == 0) &&
 443                                 (i < (argc - 1))) {
 444                         traceworthy_latency_ns = strtoull(argv[++i], NULL, 10);
 445                 } else if (strcmp(argv[i], "-affinity") == 0) {
 446                         g_do_affinity = TRUE;
 447                 } else {
 448                         print_usage();
 449                         goto fail;
 450                 }
 451         }
 452
 453         mach_timebase_info(&g_mti);
 454
 455 #if MIMIC_DIGI_LEAD_TIME
 456         g_spinlength_abs = nanos_to_abs(COMPUTATION_NANOS) / 2;
 457 #endif /* MIMIC_DIGI_LEAD_TIME */
 458
 459         /* Arrays for threads and their wakeup times */
 460         threads = (pthread_t*) malloc(sizeof(pthread_t) * g_numthreads);
 461         assert(threads, fail);
 462
 463         g_thread_endtimes_abs = (uint64_t*) malloc(sizeof(uint64_t) * g_numthreads);
 464         assert(g_thread_endtimes_abs, fail);
 465
 466         worst_latencies_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations);
 467         assert(worst_latencies_ns, fail);
 468
 469         worst_latencies_from_first_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations);
 470         assert(worst_latencies_from_first_ns, fail);
 471         res = semaphore_create(mach_task_self(), &g_main_sem, SYNC_POLICY_FIFO, 0);
 472         assert(res == KERN_SUCCESS, fail);
 473
 474         /* Either one big semaphore or one per thread */
 475         if (g_waketype == WAKE_CHAIN || g_waketype == WAKE_BROADCAST_PERTHREAD) {
 476                 g_semarr = malloc(sizeof(semaphore_t) * g_numthreads);
 477                 assert(g_semarr != NULL, fail);
 478
 479                 for (i = 0; i < g_numthreads; i++) {
 480                         res = semaphore_create(mach_task_self(), &g_semarr[i], SYNC_POLICY_FIFO, 0);
 481                         assert(res == KERN_SUCCESS, fail);
 482                 }
 483
 484                 g_leadersem = g_semarr[0];
 485         } else {
 486                 res = semaphore_create(mach_task_self(), &g_machsem, SYNC_POLICY_FIFO, 0);
 487                 assert(res == KERN_SUCCESS, fail);
 488                 res = semaphore_create(mach_task_self(), &g_leadersem, SYNC_POLICY_FIFO, 0);
 489                 assert(res == KERN_SUCCESS, fail);
 490         }
 491
 492         /* Create the threads */
 493         g_done_threads = 0;
 494         for (i = 0; i < g_numthreads; i++) {
 495                 res = pthread_create(&threads[i], NULL, child_thread_func, (void*)(uintptr_t)i);
 496                 assert(res == 0, fail);
 497         }
 498
 499         /* Let everyone get settled */
 500         semaphore_wait(g_main_sem);
 501         sleep(1);
 502
 503         /* Go! */
 504         for (i = 0; i < g_iterations; i++) {
 505                 int j;
 506                 uint64_t worst_abs = 0, best_abs = UINT64_MAX;
 507
 508                 g_done_threads = 0;
 509                 OSMemoryBarrier();
 510
 511                 g_starttime_abs = mach_absolute_time();
 512
 513                 /* Fire them off */
 514                 semaphore_signal(g_leadersem);
 515
 516                 /* Wait for worker threads to finish */
 517                 semaphore_wait(g_main_sem);
 518                 assert(res == KERN_SUCCESS, fail);
 519
 520                 /*
 521                  * We report the worst latencies relative to start time
 522                  * and relative to the lead worker thread.
 523                  */
 524                 for (j = 0; j < g_numthreads; j++) {
 525                         uint64_t latency_abs;
 526
 527                         latency_abs = g_thread_endtimes_abs[j] - g_starttime_abs;
 528                         worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
 529                 }
 530
 531                 worst_latencies_ns[i] = abs_to_nanos(worst_abs);
 532
 533                 worst_abs = 0;
 534                 for (j = 1; j < g_numthreads; j++) {
 535                         uint64_t latency_abs;
 536
 537                         latency_abs = g_thread_endtimes_abs[j] - g_thread_endtimes_abs[0];
 538                         worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
 539                         best_abs = best_abs > latency_abs ? latency_abs : best_abs;
 540                 }
 541
 542                 worst_latencies_from_first_ns[i] = abs_to_nanos(worst_abs);
 543
 544                 /*
 545                  * In the event of a bad run, cut a trace point.
 546                  */
 547                 if (worst_latencies_from_first_ns[i] > traceworthy_latency_ns) {
 548                         int _tmp;
 549
 550                         if (g_verbose) {
 551                                 printf("Worst on this round was %.2f us.\n", ((float)worst_latencies_from_first_ns[i]) / 1000.0);
 552                         }
 553
 554                         _tmp = syscall(SYS_kdebug_trace, 0xEEEEEEEE, 0, 0, 0, 0);
 555                 }
 556
 557                 /* Let worker threads get back to sleep... */
 558                 usleep(g_numthreads * 10);
 559         }
 560
 561         /* Rejoin threads */
 562         last_end = 0;
 563         for (i = 0; i < g_numthreads; i++) {
 564                 res = pthread_join(threads[i], NULL);
 565                 assert(res == 0, fail);
 566         }
 567
 568         compute_stats(worst_latencies_ns, g_iterations, &avg, &max, &min, &stddev);
 569         printf("Results (from a stop):\n");
 570         printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
 571         printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
 572         printf("Avg:\t\t%.2f us\n", avg / 1000.0);
 573         printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
 574
 575         putchar('\n');
 576
 577         compute_stats(worst_latencies_from_first_ns, g_iterations, &avg, &max, &min, &stddev);
 578         printf("Results (relative to first thread):\n");
 579         printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
 580         printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
 581         printf("Avg:\t\t%.2f us\n", avg / 1000.0);
 582         printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
 583
 584 #if 0
 585         for (i = 0; i < g_iterations; i++) {
 586                 printf("Iteration %d: %f us\n", i, worst_latencies_ns[i] / 1000.0);
 587         }
 588 #endif
 589
 590         return 0;
 591 fail:
 592         return 1;
 593 }