tools/tests/zero-to-n/zero-to-n.c

   1 /*
   2  * Copyright (c) 2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <unistd.h>
  29 #include <stdio.h>
  30 #include <math.h>
  31 #include <sys/wait.h>
  32 #include <sys/syscall.h>
  33 #include <sys/types.h>
  34 #include <sys/ptrace.h>
  35 #include <semaphore.h>
  36 #include <stdlib.h>
  37 #include <pthread.h>
  38 #include <fcntl.h>
  39 #include <errno.h>
  40 #include <string.h>
  41
  42 #include <libkern/OSAtomic.h>
  43
  44 #include <mach/mach_time.h>
  45 #include <mach/mach.h>
  46 #include <mach/task.h>
  47 #include <mach/semaphore.h>
  48
  49 typedef enum wake_type { WAKE_BROADCAST_ONESEM, WAKE_BROADCAST_PERTHREAD, WAKE_CHAIN } wake_type_t;
  50 typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_FIXEDPRI } my_policy_type_t;
  51
  52 #define assert(truth, label) do { if(!(truth)) { printf("Thread %p: failure on line %d\n", pthread_self(), __LINE__); goto label; } } while (0)
  53
  54 #define CONSTRAINT_NANOS        (20000000ll)    /* 20 ms */
  55 #define COMPUTATION_NANOS       (10000000ll)    /* 10 ms */
  56 #define TRACEWORTHY_NANOS       (10000000ll)    /* 10 ms */
  57
  58 #if DEBUG
  59 #define debug_log(args...) printf(args)
  60 #else
  61 #define debug_log(args...) do { } while(0)
  62 #endif
  63
  64 /* Declarations */
  65 void*                   child_thread_func(void *arg);
  66 void                    print_usage();
  67 int                     thread_setup();
  68 my_policy_type_t        parse_thread_policy(const char *str);
  69 int                     thread_finish_iteration();
  70
  71 /* Global variables (general) */
  72 int                     g_numthreads;
  73 wake_type_t             g_waketype;
  74 policy_t                g_policy;
  75 int                     g_iterations;
  76 struct mach_timebase_info g_mti;
  77 semaphore_t             g_main_sem;
  78 uint64_t                *g_thread_endtimes_abs;
  79 volatile int32_t        g_done_threads;
  80 boolean_t               g_do_spin = FALSE;
  81 boolean_t               g_verbose = FALSE;
  82 uint64_t                g_starttime_abs;
  83 #if MIMIC_DIGI_LEAD_TIME
  84 int                     g_long_spinid;
  85 uint64_t                g_spinlength_abs;
  86 #endif /* MIMIC_DIGI_LEAD_TIME */
  87
  88 /* Global variables (broadcast) */
  89 semaphore_t             g_machsem;
  90 semaphore_t             g_leadersem;
  91
  92 /* Global variables (chain) */
  93 semaphore_t             *g_semarr;
  94
  95 uint64_t
  96 abs_to_nanos(uint64_t abstime)
  97 {
  98         return (uint64_t)(abstime * (((double)g_mti.numer) / ((double)g_mti.denom)));
  99 }
 100
 101 uint64_t
 102 nanos_to_abs(uint64_t ns)
 103 {
 104         return (uint64_t)(ns * (((double)g_mti.denom) / ((double)g_mti.numer)));
 105 }
 106
 107 /*
 108  * Figure out what thread policy to use
 109  */
 110 my_policy_type_t
 111 parse_thread_policy(const char *str)
 112 {
 113         if (strcmp(str, "timeshare") == 0) {
 114                 return MY_POLICY_TIMESHARE;
 115         } else if (strcmp(str, "realtime") == 0) {
 116                 return MY_POLICY_REALTIME;
 117         } else if (strcmp(str, "fixed") == 0) {
 118                 return MY_POLICY_FIXEDPRI;
 119         } else {
 120                 printf("Invalid thread policy %s\n", str);
 121                 exit(1);
 122         }
 123 }
 124
 125 /*
 126  * Figure out what wakeup pattern to use
 127  */
 128 wake_type_t
 129 parse_wakeup_pattern(const char *str)
 130 {
 131         if (strcmp(str, "chain") == 0) {
 132                 return WAKE_CHAIN;
 133         } else if (strcmp(str, "broadcast-single-sem") == 0) {
 134                 return WAKE_BROADCAST_ONESEM;
 135         } else if (strcmp(str, "broadcast-per-thread") == 0) {
 136                 return WAKE_BROADCAST_PERTHREAD;
 137         } else {
 138                 print_usage();
 139                 exit(1);
 140         }
 141 }
 142
 143 /*
 144  * Set policy
 145  */
 146 int
 147 thread_setup()
 148 {
 149         int res;
 150
 151         switch (g_policy) {
 152                 case MY_POLICY_TIMESHARE:
 153                 {
 154                         return 0;
 155                 }
 156                 case MY_POLICY_REALTIME:
 157                 {
 158                         thread_time_constraint_policy_data_t pol;
 159
 160                         /* Hard-coded realtime parameters (similar to what Digi uses) */
 161                         pol.period = 100000;
 162                         pol.constraint =  nanos_to_abs(CONSTRAINT_NANOS);
 163                         pol.computation = nanos_to_abs(COMPUTATION_NANOS);
 164                         pol.preemptible = 0; /* Ignored by OS */
 165
 166                         res = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
 167                         assert(res == 0, fail);
 168                         break;
 169                 }
 170                 case MY_POLICY_FIXEDPRI:
 171                 {
 172                         thread_extended_policy_data_t pol;
 173                         pol.timeshare = 0;
 174
 175                         res = thread_policy_set(mach_thread_self(), THREAD_EXTENDED_POLICY, (thread_policy_t) &pol, THREAD_EXTENDED_POLICY_COUNT);
 176                         assert(res == 0, fail);
 177                         break;
 178                 }
 179                 default:
 180                 {
 181                         printf("invalid policy type\n");
 182                         return 1;
 183                 }
 184         }
 185
 186         return 0;
 187 fail:
 188         return 1;
 189 }
 190
 191 /*
 192  * Wake up main thread if everyone's done
 193  */
 194 int
 195 thread_finish_iteration(int id)
 196 {
 197         int32_t new;
 198         int res = 0;
 199         volatile float x = 0.0;
 200         volatile float y = 0.0;
 201
 202         debug_log("Thread %p finished iteration.\n", pthread_self());
 203
 204 #if MIMIC_DIGI_LEAD_TIME
 205         /*
 206          * One randomly chosen thread determines when everybody gets to stop.
 207          */
 208         if (g_do_spin) {
 209                 if (g_long_spinid == id) {
 210                         uint64_t endspin;
 211
 212                         /* This thread took up fully half of his computation */
 213                         endspin = g_starttime_abs + g_spinlength_abs;
 214                         while (mach_absolute_time() < endspin) {
 215                                 y = y + 1.5 + x;
 216                                 x = sqrt(y);
 217                         }
 218                 }
 219         }
 220 #endif /* MIMIC_DIGI_LEAD_TIME */
 221
 222         new = OSAtomicIncrement32(&g_done_threads);
 223
 224         debug_log("New value is %d\n", new);
 225
 226         /*
 227          * When the last thread finishes, everyone gets to go back to sleep.
 228          */
 229         if (new == g_numthreads) {
 230                 debug_log("Thread %p signalling main thread.\n", pthread_self());
 231                 res = semaphore_signal(g_main_sem);
 232         } else {
 233                 if (g_do_spin) {
 234                         while (g_done_threads < g_numthreads) {
 235                                 y = y + 1.5 + x;
 236                                 x = sqrt(y);
 237                         }
 238                 }
 239         }
 240
 241         return res;
 242 }
 243
 244 /*
 245  * Wait for a wakeup, potentially wake up another of the "0-N" threads,
 246  * and notify the main thread when done.
 247  */
 248 void*
 249 child_thread_func(void *arg)
 250 {
 251         int my_id = (int)(uintptr_t)arg;
 252         int res;
 253         int i, j;
 254         int32_t new;
 255
 256         /* Set policy and so forth */
 257         thread_setup();
 258
 259         /* Tell main thread when everyone has set up */
 260         new = OSAtomicIncrement32(&g_done_threads);
 261         if (new == g_numthreads) {
 262                 semaphore_signal(g_main_sem);
 263         }
 264
 265         /* For each iteration */
 266         for (i = 0; i < g_iterations; i++) {
 267                 /*
 268                  * Leader thread either wakes everyone up or starts the chain going.
 269                  */
 270                 if (my_id == 0) {
 271                         res = semaphore_wait(g_leadersem);
 272                         assert(res == 0, fail);
 273
 274                         g_thread_endtimes_abs[my_id] = mach_absolute_time();
 275
 276 #if MIMIC_DIGI_LEAD_TIME
 277                         g_long_spinid = rand() % g_numthreads;
 278 #endif /* MIMIC_DIGI_LEAD_TIME */
 279
 280                         switch (g_waketype) {
 281                         case WAKE_CHAIN:
 282                                 semaphore_signal(g_semarr[my_id + 1]);
 283                                 break;
 284                         case WAKE_BROADCAST_ONESEM:
 285                                 semaphore_signal_all(g_machsem);
 286                                 break;
 287                         case WAKE_BROADCAST_PERTHREAD:
 288                                 for (j = 1; j < g_numthreads; j++) {
 289                                         semaphore_signal(g_semarr[j]);
 290                                 }
 291                                 break;
 292                         default:
 293                                 printf("Invalid wakeup type?!\n");
 294                                 exit(1);
 295                         }
 296                 } else {
 297                         /*
 298                          * Everyone else waits to be woken up,
 299                          * records when she wake up, and possibly
 300                          * wakes up a friend.
 301                          */
 302                         switch(g_waketype)  {
 303                         case WAKE_BROADCAST_ONESEM:
 304                                 res = semaphore_wait(g_machsem);
 305                                 assert(res == KERN_SUCCESS, fail);
 306
 307                                 g_thread_endtimes_abs[my_id] = mach_absolute_time();
 308
 309                                 break;
 310                                 /*
 311                                  * For the chain wakeup case:
 312                                  * wait, record time, signal next thread if appropriate
 313                                  */
 314                         case WAKE_BROADCAST_PERTHREAD:
 315                                 res = semaphore_wait(g_semarr[my_id]);
 316                                 assert(res == 0, fail);
 317
 318                                 g_thread_endtimes_abs[my_id] = mach_absolute_time();
 319                                 break;
 320
 321                         case WAKE_CHAIN:
 322                                 res = semaphore_wait(g_semarr[my_id]);
 323                                 assert(res == 0, fail);
 324
 325                                 g_thread_endtimes_abs[my_id] = mach_absolute_time();
 326
 327                                 if (my_id < (g_numthreads - 1)) {
 328                                         res = semaphore_signal(g_semarr[my_id + 1]);
 329                                         assert(res == 0, fail);
 330                                 }
 331
 332                                 break;
 333                         default:
 334                                 printf("Invalid wake type.\n");
 335                                 goto fail;
 336                         }
 337                 }
 338
 339                 res = thread_finish_iteration(my_id);
 340                 assert(res == 0, fail);
 341         }
 342
 343         return 0;
 344 fail:
 345         exit(1);
 346 }
 347
 348 /*
 349  * Admittedly not very attractive.
 350  */
 351 void
 352 print_usage()
 353 {
 354         printf("Usage: zn <num threads> <chain | broadcast-single-sem | broadcast-per-thread> <realtime | timeshare | fixed> <num iterations> [-trace  <traceworthy latency in ns>] [-spin] [-verbose]\n");
 355 }
 356
 357 /*
 358  * Given an array of uint64_t values, compute average, max, min, and standard deviation
 359  */
 360 void
 361 compute_stats(uint64_t *values, uint64_t count, float *averagep, uint64_t *maxp, uint64_t *minp, float *stddevp)
 362 {
 363         int i;
 364         uint64_t _sum = 0;
 365         uint64_t _max = 0;
 366         uint64_t _min = UINT64_MAX;
 367         float    _avg = 0;
 368         float    _dev = 0;
 369
 370         for (i = 0; i < count; i++) {
 371                 _sum += values[i];
 372                 _max = values[i] > _max ? values[i] : _max;
 373                 _min = values[i] < _min ? values[i] : _min;
 374         }
 375
 376         _avg = ((float)_sum) / ((float)count);
 377
 378         _dev = 0;
 379         for (i = 0; i < count; i++) {
 380                 _dev += powf((((float)values[i]) - _avg), 2);
 381         }
 382
 383         _dev /= count;
 384         _dev = sqrtf(_dev);
 385
 386         *averagep = _avg;
 387         *maxp = _max;
 388         *minp = _min;
 389         *stddevp = _dev;
 390 }
 391
 392 int
 393 main(int argc, char **argv)
 394 {
 395         int             i;
 396         int             res;
 397         pthread_t       *threads;
 398         uint64_t        *worst_latencies_ns;
 399         uint64_t        *worst_latencies_from_first_ns;
 400         uint64_t        last_end;
 401         uint64_t        max, min;
 402         uint64_t        traceworthy_latency_ns = TRACEWORTHY_NANOS;
 403         float           avg, stddev;
 404
 405         srand(time(NULL));
 406
 407         if (argc < 5 || argc > 9) {
 408                 print_usage();
 409                 goto fail;
 410         }
 411
 412         /* How many threads? */
 413         g_numthreads = atoi(argv[1]);
 414
 415         /* What wakeup pattern? */
 416         g_waketype = parse_wakeup_pattern(argv[2]);
 417
 418         /* Policy */
 419         g_policy = parse_thread_policy(argv[3]);
 420
 421         /* Iterations */
 422         g_iterations = atoi(argv[4]);
 423
 424         /* Optional args */
 425         for (i = 5; i < argc; i++) {
 426                 if (strcmp(argv[i], "-spin") == 0) {
 427                         g_do_spin = TRUE;
 428                 } else if (strcmp(argv[i], "-verbose") == 0) {
 429                         g_verbose = TRUE;
 430                 } else if ((strcmp(argv[i], "-trace") == 0) &&
 431                                 (i < (argc - 1))) {
 432                         traceworthy_latency_ns = strtoull(argv[++i], NULL, 10);
 433                 } else {
 434                         print_usage();
 435                         goto fail;
 436                 }
 437         }
 438
 439         mach_timebase_info(&g_mti);
 440
 441 #if MIMIC_DIGI_LEAD_TIME
 442         g_spinlength_abs = nanos_to_abs(COMPUTATION_NANOS) / 2;
 443 #endif /* MIMIC_DIGI_LEAD_TIME */
 444
 445         /* Arrays for threads and their wakeup times */
 446         threads = (pthread_t*) malloc(sizeof(pthread_t) * g_numthreads);
 447         assert(threads, fail);
 448
 449         g_thread_endtimes_abs = (uint64_t*) malloc(sizeof(uint64_t) * g_numthreads);
 450         assert(g_thread_endtimes_abs, fail);
 451
 452         worst_latencies_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations);
 453         assert(worst_latencies_ns, fail);
 454
 455         worst_latencies_from_first_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations);
 456         assert(worst_latencies_from_first_ns, fail);
 457         res = semaphore_create(mach_task_self(), &g_main_sem, SYNC_POLICY_FIFO, 0);
 458         assert(res == KERN_SUCCESS, fail);
 459
 460         /* Either one big semaphore or one per thread */
 461         if (g_waketype == WAKE_CHAIN || g_waketype == WAKE_BROADCAST_PERTHREAD) {
 462                 g_semarr = malloc(sizeof(semaphore_t) * g_numthreads);
 463                 assert(g_semarr != NULL, fail);
 464
 465                 for (i = 0; i < g_numthreads; i++) {
 466                         res = semaphore_create(mach_task_self(), &g_semarr[i], SYNC_POLICY_FIFO, 0);
 467                         assert(res == KERN_SUCCESS, fail);
 468                 }
 469
 470                 g_leadersem = g_semarr[0];
 471         } else {
 472                 res = semaphore_create(mach_task_self(), &g_machsem, SYNC_POLICY_FIFO, 0);
 473                 assert(res == KERN_SUCCESS, fail);
 474                 res = semaphore_create(mach_task_self(), &g_leadersem, SYNC_POLICY_FIFO, 0);
 475                 assert(res == KERN_SUCCESS, fail);
 476         }
 477
 478         /* Create the threads */
 479         g_done_threads = 0;
 480         for (i = 0; i < g_numthreads; i++) {
 481                 res = pthread_create(&threads[i], NULL, child_thread_func, (void*)(uintptr_t)i);
 482                 assert(res == 0, fail);
 483         }
 484
 485         /* Let everyone get settled */
 486         semaphore_wait(g_main_sem);
 487         sleep(1);
 488
 489         /* Go! */
 490         for (i = 0; i < g_iterations; i++) {
 491                 int j;
 492                 uint64_t worst_abs = 0, best_abs = UINT64_MAX;
 493
 494                 g_done_threads = 0;
 495                 OSMemoryBarrier();
 496
 497                 g_starttime_abs = mach_absolute_time();
 498
 499                 /* Fire them off */
 500                 semaphore_signal(g_leadersem);
 501
 502                 /* Wait for worker threads to finish */
 503                 semaphore_wait(g_main_sem);
 504                 assert(res == KERN_SUCCESS, fail);
 505
 506                 /*
 507                  * We report the worst latencies relative to start time
 508                  * and relative to the lead worker thread.
 509                  */
 510                 for (j = 0; j < g_numthreads; j++) {
 511                         uint64_t latency_abs;
 512
 513                         latency_abs = g_thread_endtimes_abs[j] - g_starttime_abs;
 514                         worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
 515                 }
 516
 517                 worst_latencies_ns[i] = abs_to_nanos(worst_abs);
 518
 519                 worst_abs = 0;
 520                 for (j = 1; j < g_numthreads; j++) {
 521                         uint64_t latency_abs;
 522
 523                         latency_abs = g_thread_endtimes_abs[j] - g_thread_endtimes_abs[0];
 524                         worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
 525                         best_abs = best_abs > latency_abs ? latency_abs : best_abs;
 526                 }
 527
 528                 worst_latencies_from_first_ns[i] = abs_to_nanos(worst_abs);
 529
 530                 /*
 531                  * In the event of a bad run, cut a trace point.
 532                  */
 533                 if (worst_latencies_from_first_ns[i] > traceworthy_latency_ns) {
 534                         int _tmp;
 535
 536                         if (g_verbose) {
 537                                 printf("Worst on this round was %.2f us.\n", ((float)worst_latencies_from_first_ns[i]) / 1000.0);
 538                         }
 539
 540                         _tmp = syscall(SYS_kdebug_trace, 0xEEEEEEEE, 0, 0, 0, 0);
 541                 }
 542
 543                 /* Let worker threads get back to sleep... */
 544                 usleep(g_numthreads * 10);
 545         }
 546
 547         /* Rejoin threads */
 548         last_end = 0;
 549         for (i = 0; i < g_numthreads; i++) {
 550                 res = pthread_join(threads[i], NULL);
 551                 assert(res == 0, fail);
 552         }
 553
 554         compute_stats(worst_latencies_ns, g_iterations, &avg, &max, &min, &stddev);
 555         printf("Results (from a stop):\n");
 556         printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
 557         printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
 558         printf("Avg:\t\t%.2f us\n", avg / 1000.0);
 559         printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
 560
 561         putchar('\n');
 562
 563         compute_stats(worst_latencies_from_first_ns, g_iterations, &avg, &max, &min, &stddev);
 564         printf("Results (relative to first thread):\n");
 565         printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
 566         printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
 567         printf("Avg:\t\t%.2f us\n", avg / 1000.0);
 568         printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
 569
 570 #if 0
 571         for (i = 0; i < g_iterations; i++) {
 572                 printf("Iteration %d: %f us\n", i, worst_latencies_ns[i] / 1000.0);
 573         }
 574 #endif
 575
 576         return 0;
 577 fail:
 578         return 1;
 579 }