tools/tests/libMicro/apple/lmbench_lat_ctx.c

   1 /*
   2  * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29
  30 /*
  31  *      Order of Execution
  32  *
  33  *      benchmark_init
  34  *
  35  *      benchmark_optswitch
  36  *
  37  *              benchmark_initrun
  38  *
  39  *                      benchmark_initworker
  40  *                              benchmark_initbatch
  41  *                                      benchmark
  42  *                              benchmark_finibatch
  43  *                              benchmark_initbatch
  44  *                                      benchmark
  45  *                              benchmark_finibatch, etc.
  46  *                      benchmark_finiworker
  47  *
  48  *              benchmark_result
  49  *
  50  *              benchmark_finirun
  51  *
  52  *      benchmark_fini
  53  */
  54
  55
  56
  57 #ifdef  __sun
  58 #pragma ident   "@(#)trivial.c  1.0     08/17/06 Apple Inc."
  59 #endif
  60
  61
  62
  63 #include <unistd.h>
  64 #include <stdlib.h>
  65 #include <stdio.h>
  66
  67 #include <signal.h>
  68 #include <strings.h>
  69
  70 #include <sys/sysctl.h>
  71 #include "../libmicro.h"
  72
  73 #if 1
  74 # define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
  75 #else
  76 # define debug(fmt, args...)
  77 #endif
  78
  79
  80 #define MAXPROC 2048
  81 #define CHUNK   (4<<10)
  82 #define TRIPS   5
  83 #ifndef max
  84 #define max(a, b)       ((a) > (b) ? (a) : (b))
  85 #endif
  86
  87
  88 /*
  89  *      Your state variables should live in the tsd_t struct below
  90  */
  91 typedef struct {
  92         int     process_size;
  93         double  overhead;
  94         int     procs;
  95         pid_t*  pids;
  96         int     **p;
  97         void*   data;
  98 } tsd_t;
  99
 100 static int      opts = 1;
 101
 102 void    doit(int rd, int wr, int process_size);
 103 int             create_pipes(int **p, int procs);
 104 int             create_daemons(int **p, pid_t *pids, int procs, int process_size);
 105 void    initialize_overhead(void* tsd);
 106 void    cleanup_overhead(void* tsd);
 107 void    benchmark_overhead(void* tsd);
 108 void    initialize(void* tsd);
 109 void    cleanup(void* tsd);
 110 long    bread(void* buf, long nbytes);
 111
 112
 113 #pragma mark *** lmbench routines
 114
 115 /*
 116  * lmbench routines, etc. brought over for this benchmark
 117  */
 118
 119 void
 120 morefds(void)
 121 {
 122 #ifdef  RLIMIT_NOFILE
 123         struct  rlimit r;
 124
 125         getrlimit(RLIMIT_NOFILE, &r);
 126         r.rlim_cur = r.rlim_max;
 127         setrlimit(RLIMIT_NOFILE, &r);
 128 #endif
 129 }
 130
 131 void
 132 doit(int rd, int wr, int process_size)
 133 {
 134         int     msg;
 135         void*   data = NULL;
 136
 137         if (process_size) {
 138                 data = malloc(process_size);
 139                 if (data) bzero(data, process_size);
 140         }
 141         for ( ;; ) {
 142                 if (read(rd, &msg, sizeof(msg)) != sizeof(msg)) {
 143                         debug("read/write on pipe");
 144                         break;
 145                 }
 146                 bread(data, process_size);
 147                 if (write(wr, &msg, sizeof(msg)) != sizeof(msg)) {
 148                         debug("read/write on pipe");
 149                         break;
 150                 }
 151         }
 152         exit(0);
 153 }
 154
 155 /*
 156  * Return the number of processors in this host
 157  */
 158 int
 159 sched_ncpus()
 160 {
 161 #ifdef MP_NPROCS
 162         /* SGI IRIX interface */
 163         return sysmp(MP_NPROCS);
 164 #elif defined(HAVE_MPCTL)
 165         /* HP-UX interface */
 166         return mpctl(MPC_GETNUMSPUS_SYS, 0, 0);
 167 #elif defined(_SC_NPROCESSORS_ONLN)
 168         /* AIX, Solaris, and Linux interface */
 169         return sysconf(_SC_NPROCESSORS_ONLN);
 170 #elif __APPLE__
 171         char *name="hw.activecpu";
 172         int cpus, retval;
 173         size_t len = 4;
 174         retval=sysctlbyname(name, &cpus, &len, NULL, 0);
 175         /* Check retval here */
 176         debug("cpus = %d retval = %d", cpus, retval);
 177         return cpus;
 178 #endif
 179         return 1;
 180 }
 181
 182 /*
 183  * Use to get sequentially created processes "far" away from
 184  * each other in an SMP.
 185  *
 186  * XXX: probably doesn't work for NCPUS not a power of two.
 187  */
 188 int
 189 reverse_bits(int cpu)
 190 {
 191         int     i;
 192         int     nbits;
 193         int     max = sched_ncpus() - 1;
 194         int     cpu_reverse = 0;
 195
 196         for (i = max>>1, nbits = 1; i > 0; i >>= 1, nbits++)
 197           ;
 198         /* now reverse the bits */
 199         for (i = 0; i < nbits; i++) {
 200                 if (cpu & (1<<i))
 201                         cpu_reverse |= (1<<(nbits-i-1));
 202         }
 203         return cpu_reverse;
 204 }
 205
 206
 207 /*
 208  * The interface used by benchmp.
 209  *
 210  * childno is the "logical" child id number.
 211  *      In range [0, ..., parallel-1].
 212  * benchproc is the "logical" id within the benchmark process.  The
 213  *      benchmp-created process is logical ID zero, child processes
 214  *      created by the benchmark range from [1, ..., nbenchprocs].
 215  * nbenchprocs is the number of child processes that each benchmark
 216  *      process will create.  Most benchmarks will leave this zero,
 217  *      but some such as the pipe() benchmarks will not.
 218  */
 219 int
 220 handle_scheduler(int childno, int benchproc, int nbenchprocs)
 221 {
 222         int     cpu = 0;
 223         char*   sched = getenv("LMBENCH_SCHED");
 224
 225         if (!sched || strcasecmp(sched, "DEFAULT") == 0) {
 226                 /* do nothing.  Allow scheduler to control placement */
 227                 return 0;
 228         } else if (strcasecmp(sched, "SINGLE") == 0) {
 229                 /* assign all processes to CPU 0 */
 230                 cpu = 0;
 231         } else if (strcasecmp(sched, "BALANCED") == 0) {
 232                 /* assign each benchmark process to its own processor,
 233                  * but child processes will share the CPU with the
 234                  * parent.
 235                  */
 236                 cpu = childno;
 237         } else if (strcasecmp(sched, "BALANCED_SPREAD") == 0) {
 238                 /*
 239                  * assign each benchmark process to its own processor,
 240                  * logically as far away from neighboring IDs as
 241                  * possible.  This can help identify bus contention
 242                  * issues in SMPs with hierarchical busses or NUMA
 243                  * memory.
 244                  */
 245                 cpu = reverse_bits(childno);
 246         } else if (strcasecmp(sched, "UNIQUE") == 0) {
 247                 /*
 248                  * assign each benchmark process and each child process
 249                  * to its own processor.
 250                  */
 251                 cpu = childno * (nbenchprocs + 1) + benchproc;
 252         } else if (strcasecmp(sched, "UNIQUE_SPREAD") == 0) {
 253                 /*
 254                  * assign each benchmark process and each child process
 255                  * to its own processor, logically as far away from
 256                  * neighboring IDs as possible.  This can help identify
 257                  * bus contention issues in SMPs with hierarchical busses
 258                  * or NUMA memory.
 259                  */
 260                 cpu = reverse_bits(childno * (nbenchprocs + 1) + benchproc);
 261         }
 262 #if 0 // BLOB
 263           else if (strncasecmp(sched, "CUSTOM ", strlen("CUSTOM ")) == 0) {
 264                 cpu = custom(sched + strlen("CUSTOM"), childno);
 265         } else if (strncasecmp(sched, "CUSTOM_UNIQUE ", strlen("CUSTOM_UNIQUE ")) == 0) {
 266                 cpu = custom(sched + strlen("CUSTOM_UNIQUE"),
 267                              childno * (nbenchprocs + 1) + benchproc);
 268         }
 269 #endif // BLOB
 270                 else {
 271                 /* default action: do nothing */
 272                 return 0;
 273         }
 274         debug("cpu = %d, sched_ncpus() = %d", cpu, sched_ncpus());
 275         return 0;
 276 //      return sched_pin(cpu % sched_ncpus());
 277 }
 278
 279 int
 280 create_daemons(int **p, pid_t *pids, int procs, int process_size)
 281 {
 282         int     i, j;
 283         int     msg;
 284
 285         /*
 286          * Use the pipes as a ring, and fork off a bunch of processes
 287          * to pass the byte through their part of the ring.
 288          *
 289          * Do the sum in each process and get that time before moving on.
 290          */
 291         handle_scheduler(getpid(), 0, procs-1);
 292         for (i = 1; i < procs; ++i) {
 293                 switch (pids[i] = fork()) {
 294                     case -1:    /* could not fork, out of processes? */
 295                         return i;
 296
 297                     case 0:     /* child */
 298                         handle_scheduler(getpid(), i, procs-1);
 299                         for (j = 0; j < procs; ++j) {
 300                                 if (j != i - 1) close(p[j][0]);
 301                                 if (j != i) close(p[j][1]);
 302                         }
 303                         doit(p[i-1][0], p[i][1], process_size);
 304                         /* NOTREACHED */
 305
 306                     default:    /* parent */
 307                         ;
 308                 }
 309         }
 310
 311         /*
 312          * Go once around the loop to make sure that everyone is ready and
 313          * to get the token in the pipeline.
 314          */
 315         if (write(p[0][1], &msg, sizeof(msg)) != sizeof(msg) ||
 316             read(p[procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) {
 317                 debug("write/read/write on pipe");
 318                 exit(1);
 319         }
 320         return procs;
 321 }
 322
 323 int
 324 create_pipes(int **p, int procs)
 325 {
 326         int     i;
 327         /*
 328          * Get a bunch of pipes.
 329          */
 330         morefds();
 331         for (i = 0; i < procs; ++i) {
 332                 if (pipe(p[i]) == -1) {
 333                         return i;
 334                 }
 335         }
 336         return procs;
 337 }
 338
 339 void
 340 initialize_overhead(void* cookie)
 341 {
 342     int i;
 343     int procs;
 344     int* p;
 345     tsd_t       *pState = (tsd_t *)cookie;
 346
 347     pState->pids = NULL;
 348     pState->p = (int**)malloc(pState->procs * (sizeof(int*) + 2 * sizeof(int)));
 349     p = (int*)&pState->p[pState->procs];
 350     for (i = 0; i < pState->procs; ++i) {
 351         pState->p[i] = p;
 352         p += 2;
 353     }
 354
 355     pState->data = (pState->process_size > 0) ? malloc(pState->process_size) : NULL;
 356     if (pState->data)
 357         bzero(pState->data, pState->process_size);
 358
 359     procs = create_pipes(pState->p, pState->procs);
 360     if (procs < pState->procs) {
 361         debug("procs < pState->procs");
 362         cleanup_overhead(cookie);
 363         exit(1);
 364     }
 365 }
 366
 367 void
 368 cleanup_overhead(void* tsd)
 369 {
 370         int     i;
 371         tsd_t   *ts = (tsd_t *)tsd;
 372
 373         for (i = 0; i < ts->procs; ++i) {
 374                 close(ts->p[i][0]);
 375                 close(ts->p[i][1]);
 376         }
 377
 378         free(ts->p);
 379         if (ts->data) free(ts->data);
 380 }
 381
 382 void
 383 cleanup(void* cookie)
 384 {
 385     int         i;
 386     tsd_t       *pState = (tsd_t *)cookie;
 387
 388
 389     /*
 390      * Close the pipes and kill the children.
 391      */
 392     cleanup_overhead(cookie);
 393         for (i = 1; pState->pids && i < pState->procs; ++i) {
 394         if (pState->pids[i] > 0) {
 395             kill(pState->pids[i], SIGKILL);
 396             waitpid(pState->pids[i], NULL, 0);
 397         }
 398     }
 399     if (pState->pids)
 400         free(pState->pids);
 401     pState->pids = NULL;
 402 }
 403
 404 void
 405 benchmark_overhead(void* tsd)
 406 {
 407         tsd_t   *ts = (tsd_t *)tsd;
 408         int     i = 0;
 409         int     msg = 1;
 410
 411         for (i = 0; i < lm_optB; i++) {
 412                 if (write(ts->p[i][1], &msg, sizeof(msg)) != sizeof(msg)) {
 413                         debug("read/write on pipe");
 414                         exit(1);
 415                 }
 416                 if (read(ts->p[i][0], &msg, sizeof(msg)) != sizeof(msg)) {
 417                         debug("read/write on pipe");
 418                         exit(1);
 419                 }
 420                 if (++i == ts->procs) {
 421                         i = 0;
 422                 }
 423                 bread(ts->data, ts->process_size);
 424         }
 425 }
 426
 427 /* analogous to bzero, bcopy, etc., except that it just reads
 428  * data into the processor
 429  */
 430 long
 431 bread(void* buf, long nbytes)
 432 {
 433         long sum = 0;
 434         register long *p, *next;
 435         register char *end;
 436
 437         p = (long*)buf;
 438         end = (char*)buf + nbytes;
 439         for (next = p + 128; (void*)next <= (void*)end; p = next, next += 128) {
 440                 sum +=
 441                         p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+
 442                         p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+
 443                         p[15]+p[16]+p[17]+p[18]+p[19]+p[20]+p[21]+
 444                         p[22]+p[23]+p[24]+p[25]+p[26]+p[27]+p[28]+
 445                         p[29]+p[30]+p[31]+p[32]+p[33]+p[34]+p[35]+
 446                         p[36]+p[37]+p[38]+p[39]+p[40]+p[41]+p[42]+
 447                         p[43]+p[44]+p[45]+p[46]+p[47]+p[48]+p[49]+
 448                         p[50]+p[51]+p[52]+p[53]+p[54]+p[55]+p[56]+
 449                         p[57]+p[58]+p[59]+p[60]+p[61]+p[62]+p[63]+
 450                         p[64]+p[65]+p[66]+p[67]+p[68]+p[69]+p[70]+
 451                         p[71]+p[72]+p[73]+p[74]+p[75]+p[76]+p[77]+
 452                         p[78]+p[79]+p[80]+p[81]+p[82]+p[83]+p[84]+
 453                         p[85]+p[86]+p[87]+p[88]+p[89]+p[90]+p[91]+
 454                         p[92]+p[93]+p[94]+p[95]+p[96]+p[97]+p[98]+
 455                         p[99]+p[100]+p[101]+p[102]+p[103]+p[104]+
 456                         p[105]+p[106]+p[107]+p[108]+p[109]+p[110]+
 457                         p[111]+p[112]+p[113]+p[114]+p[115]+p[116]+
 458                         p[117]+p[118]+p[119]+p[120]+p[121]+p[122]+
 459                         p[123]+p[124]+p[125]+p[126]+p[127];
 460         }
 461         for (next = p + 16; (void*)next <= (void*)end; p = next, next += 16) {
 462                 sum +=
 463                         p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+
 464                         p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+
 465                         p[15];
 466         }
 467         for (next = p + 1; (void*)next <= (void*)end; p = next, next++) {
 468                 sum += *p;
 469         }
 470         return sum;
 471 }
 472
 473 #pragma mark *** darbench routines
 474
 475
 476 /*ARGSUSED*/
 477 int
 478 benchmark_initbatch(void *tsd)
 479 {
 480         /*
 481          * initialize your state variables here second
 482          */
 483         tsd_t                   *ts = (tsd_t *)tsd;
 484         int procs;
 485
 486         initialize_overhead(tsd);
 487
 488         ts->pids = (pid_t*)malloc(ts->procs * sizeof(pid_t));
 489         if (ts->pids == NULL)
 490                 exit(1);
 491         bzero((void*)ts->pids, ts->procs * sizeof(pid_t));
 492         procs = create_daemons(ts->p, ts->pids,
 493                                ts->procs, ts->process_size);
 494         if (procs < ts->procs) {
 495                 cleanup(tsd);
 496                 exit(1);
 497         }
 498         return (0);
 499 }
 500
 501 int
 502 benchmark_finirun()
 503 {
 504         return (0);
 505 }
 506
 507 int
 508 benchmark_init()
 509 {
 510         /*
 511          *      the lm_optstr must be defined here or no options for you
 512          *
 513          *      ...and the framework will throw an error
 514          *
 515          */
 516         (void) sprintf(lm_optstr, "s:");
 517         /*
 518          *      working hypothesis:
 519          *
 520          *      tsd_t is the struct that we can pass around our
 521          *      state info in
 522          *
 523          *      lm_tsdsize will allocate the space we need for this
 524          *      structure throughout the rest of the framework
 525          */
 526         lm_tsdsize = sizeof (tsd_t);
 527
 528         (void) sprintf(lm_usage,
 529                 "               [-s kbytes]\n"
 530                 "               processes [processes ...]\n");
 531
 532         return (0);
 533 }
 534
 535 int
 536 benchmark_fini()
 537 {
 538         return (0);
 539 }
 540
 541 int
 542 benchmark_finibatch(void *tsd)
 543 {
 544         tsd_t                   *ts = (tsd_t *)tsd;
 545         int i;
 546
 547         /*
 548          * Close the pipes and kill the children.
 549          */
 550         cleanup_overhead(tsd);
 551         for (i = 1; ts->pids && i < ts->procs; ++i) {
 552                 if (ts->pids[i] > 0) {
 553                         kill(ts->pids[i], SIGKILL);
 554                         waitpid(ts->pids[i], NULL, 0);
 555                 }
 556         }
 557         if (ts->pids)
 558                 free(ts->pids);
 559         ts->pids = NULL;
 560         return (0);
 561 }
 562
 563 char *
 564 benchmark_result()
 565 {
 566         static char             result = '\0';
 567         return (&result);
 568 }
 569
 570 int
 571 benchmark_finiworker(void *tsd)
 572 {
 573         return (0);
 574 }
 575
 576 int
 577 benchmark_optswitch(int opt, char *optarg)
 578 {
 579
 580         switch (opt) {
 581         case 's':
 582                 opts = sizetoint(optarg);
 583                 break;
 584         default:
 585                 return (-1);
 586         }
 587         return (0);
 588 }
 589
 590 int
 591 benchmark_initworker(void *tsd)
 592 {
 593         tsd_t           *ts = (tsd_t *)tsd;
 594
 595         ts->process_size = opts;
 596
 597         return (0);
 598 }
 599
 600 int
 601 benchmark_initrun()
 602 {
 603         return (0);
 604 }
 605
 606 int
 607 benchmark(void *tsd, result_t *res)
 608 {
 609         /*
 610          *      initialize your state variables here last
 611          *
 612          *      and realize that you are paying for your initialization here
 613          *      and it is really a bad idea
 614          */
 615         tsd_t           *ts = (tsd_t *)tsd;
 616         int                     i;
 617         int                     msg=1;
 618
 619         for (i = 0; i < lm_optB; i++) {
 620                 if (write(ts->p[0][1], &msg, sizeof(msg)) !=
 621                     sizeof(msg)) {
 622                         debug("read/write on pipe");
 623                         exit(1);
 624                 }
 625                 if (read(ts->p[ts->procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) {
 626                         debug("read/write on pipe");
 627                         exit(1);
 628                 }
 629                 bread(ts->data, ts->process_size);
 630         }
 631         res->re_count = i;
 632
 633         return (0);
 634 }